Coverage Report

Created: 2024-01-20 12:31

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
3.44M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
606
#define XML_PARSER_NON_LINEAR 10
129
130
32.8M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
156M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
17.4G
#define XML_PARSER_BUFFER_SIZE 100
147
707k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
49.5M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
107k
{
215
107k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
107k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
107k
    if (ctxt != NULL)
219
107k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
107k
    if (prefix == NULL)
222
75.3k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
75.3k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
75.3k
                        (const char *) localname, NULL, NULL, 0, 0,
225
75.3k
                        "Attribute %s redefined\n", localname);
226
32.2k
    else
227
32.2k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
32.2k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
32.2k
                        (const char *) prefix, (const char *) localname,
230
32.2k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
32.2k
                        localname);
232
107k
    if (ctxt != NULL) {
233
107k
  ctxt->wellFormed = 0;
234
107k
  if (ctxt->recovery == 0)
235
23.0k
      ctxt->disableSAX = 1;
236
107k
    }
237
107k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
5.64M
{
250
5.64M
    const char *errmsg;
251
252
5.64M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
5.64M
        (ctxt->instate == XML_PARSER_EOF))
254
28.7k
  return;
255
5.62M
    switch (error) {
256
105k
        case XML_ERR_INVALID_HEX_CHARREF:
257
105k
            errmsg = "CharRef: invalid hexadecimal value";
258
105k
            break;
259
204k
        case XML_ERR_INVALID_DEC_CHARREF:
260
204k
            errmsg = "CharRef: invalid decimal value";
261
204k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
2.41M
        case XML_ERR_INTERNAL_ERROR:
266
2.41M
            errmsg = "internal error";
267
2.41M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
15.2k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
15.2k
            errmsg = "PEReference: expecting ';'";
282
15.2k
            break;
283
1.98k
        case XML_ERR_ENTITY_LOOP:
284
1.98k
            errmsg = "Detected an entity reference loop";
285
1.98k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
6.28k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
6.28k
            errmsg = "PEReferences forbidden in internal subset";
291
6.28k
            break;
292
5.18k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
5.18k
            errmsg = "EntityValue: \" or ' expected";
294
5.18k
            break;
295
139k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
139k
            errmsg = "AttValue: \" or ' expected";
297
139k
            break;
298
566k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
566k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
566k
            break;
301
14.1k
        case XML_ERR_LITERAL_NOT_STARTED:
302
14.1k
            errmsg = "SystemLiteral \" or ' expected";
303
14.1k
            break;
304
18.2k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
18.2k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
18.2k
            break;
307
188k
        case XML_ERR_MISPLACED_CDATA_END:
308
188k
            errmsg = "Sequence ']]>' not allowed in content";
309
188k
            break;
310
12.3k
        case XML_ERR_URI_REQUIRED:
311
12.3k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
12.3k
            break;
313
1.84k
        case XML_ERR_PUBID_REQUIRED:
314
1.84k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
1.84k
            break;
316
126k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
126k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
126k
            break;
319
85.1k
        case XML_ERR_PI_NOT_STARTED:
320
85.1k
            errmsg = "xmlParsePI : no target name";
321
85.1k
            break;
322
16.8k
        case XML_ERR_RESERVED_XML_NAME:
323
16.8k
            errmsg = "Invalid PI name";
324
16.8k
            break;
325
1.76k
        case XML_ERR_NOTATION_NOT_STARTED:
326
1.76k
            errmsg = "NOTATION: Name expected here";
327
1.76k
            break;
328
11.7k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
11.7k
            errmsg = "'>' required to close NOTATION declaration";
330
11.7k
            break;
331
12.5k
        case XML_ERR_VALUE_REQUIRED:
332
12.5k
            errmsg = "Entity value required";
333
12.5k
            break;
334
3.24k
        case XML_ERR_URI_FRAGMENT:
335
3.24k
            errmsg = "Fragment not allowed";
336
3.24k
            break;
337
17.4k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
17.4k
            errmsg = "'(' required to start ATTLIST enumeration";
339
17.4k
            break;
340
1.89k
        case XML_ERR_NMTOKEN_REQUIRED:
341
1.89k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
1.89k
            break;
343
5.50k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
5.50k
            errmsg = "')' required to finish ATTLIST enumeration";
345
5.50k
            break;
346
5.32k
        case XML_ERR_MIXED_NOT_STARTED:
347
5.32k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
5.32k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
12.0k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
12.0k
            errmsg = "ContentDecl : Name or '(' expected";
354
12.0k
            break;
355
21.8k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
21.8k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
21.8k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
548k
        case XML_ERR_GT_REQUIRED:
363
548k
            errmsg = "expected '>'";
364
548k
            break;
365
417
        case XML_ERR_CONDSEC_INVALID:
366
417
            errmsg = "XML conditional section '[' expected";
367
417
            break;
368
20.9k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
20.9k
            errmsg = "Content error in the external subset";
370
20.9k
            break;
371
1.62k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
1.62k
            errmsg =
373
1.62k
                "conditional section INCLUDE or IGNORE keyword expected";
374
1.62k
            break;
375
2.05k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.05k
            errmsg = "XML conditional section not closed";
377
2.05k
            break;
378
521
        case XML_ERR_XMLDECL_NOT_STARTED:
379
521
            errmsg = "Text declaration '<?xml' required";
380
521
            break;
381
170k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
170k
            errmsg = "parsing XML declaration: '?>' expected";
383
170k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
360k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
360k
            errmsg = "EntityRef: expecting ';'";
389
360k
            break;
390
46.5k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
46.5k
            errmsg = "DOCTYPE improperly terminated";
392
46.5k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
11.9k
        case XML_ERR_EQUAL_REQUIRED:
397
11.9k
            errmsg = "expected '='";
398
11.9k
            break;
399
34.7k
        case XML_ERR_STRING_NOT_CLOSED:
400
34.7k
            errmsg = "String not closed expecting \" or '";
401
34.7k
            break;
402
8.56k
        case XML_ERR_STRING_NOT_STARTED:
403
8.56k
            errmsg = "String not started expecting ' or \"";
404
8.56k
            break;
405
1.03k
        case XML_ERR_ENCODING_NAME:
406
1.03k
            errmsg = "Invalid XML encoding name";
407
1.03k
            break;
408
5.61k
        case XML_ERR_STANDALONE_VALUE:
409
5.61k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
5.61k
            break;
411
30.1k
        case XML_ERR_DOCUMENT_EMPTY:
412
30.1k
            errmsg = "Document is empty";
413
30.1k
            break;
414
246k
        case XML_ERR_DOCUMENT_END:
415
246k
            errmsg = "Extra content at the end of the document";
416
246k
            break;
417
4.47k
        case XML_ERR_NOT_WELL_BALANCED:
418
4.47k
            errmsg = "chunk is not well balanced";
419
4.47k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
98.1k
        case XML_ERR_VERSION_MISSING:
424
98.1k
            errmsg = "Malformed declaration expecting version";
425
98.1k
            break;
426
42
        case XML_ERR_NAME_TOO_LONG:
427
42
            errmsg = "Name too long";
428
42
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
15.4k
        default:
435
15.4k
            errmsg = "Unregistered error message";
436
5.62M
    }
437
5.62M
    if (ctxt != NULL)
438
5.62M
  ctxt->errNo = error;
439
5.62M
    if (info == NULL) {
440
3.20M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
3.20M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
3.20M
                        errmsg);
443
3.20M
    } else {
444
2.41M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
2.41M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
2.41M
                        errmsg, info);
447
2.41M
    }
448
5.62M
    if (ctxt != NULL) {
449
5.62M
  ctxt->wellFormed = 0;
450
5.62M
  if (ctxt->recovery == 0)
451
728k
      ctxt->disableSAX = 1;
452
5.62M
    }
453
5.62M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
8.59M
{
467
8.59M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
8.59M
        (ctxt->instate == XML_PARSER_EOF))
469
52
  return;
470
8.59M
    if (ctxt != NULL)
471
8.59M
  ctxt->errNo = error;
472
8.59M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
8.59M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
8.59M
    if (ctxt != NULL) {
475
8.59M
  ctxt->wellFormed = 0;
476
8.59M
  if (ctxt->recovery == 0)
477
1.01M
      ctxt->disableSAX = 1;
478
8.59M
    }
479
8.59M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
273k
{
495
273k
    xmlStructuredErrorFunc schannel = NULL;
496
497
273k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
273k
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
273k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
273k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
175k
        schannel = ctxt->sax->serror;
503
273k
    if (ctxt != NULL) {
504
273k
        __xmlRaiseError(schannel,
505
273k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
273k
                    ctxt->userData,
507
273k
                    ctxt, NULL, XML_FROM_PARSER, error,
508
273k
                    XML_ERR_WARNING, NULL, 0,
509
273k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
273k
        msg, (const char *) str1, (const char *) str2);
511
273k
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
273k
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
19.0k
{
533
19.0k
    xmlStructuredErrorFunc schannel = NULL;
534
535
19.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
19.0k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
19.0k
    if (ctxt != NULL) {
539
19.0k
  ctxt->errNo = error;
540
19.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
10.5k
      schannel = ctxt->sax->serror;
542
19.0k
    }
543
19.0k
    if (ctxt != NULL) {
544
19.0k
        __xmlRaiseError(schannel,
545
19.0k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
19.0k
                    ctxt, NULL, XML_FROM_DTD, error,
547
19.0k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
19.0k
        (const char *) str2, NULL, 0, 0,
549
19.0k
        msg, (const char *) str1, (const char *) str2);
550
19.0k
  ctxt->valid = 0;
551
19.0k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
19.0k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
13.4M
{
573
13.4M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
13.4M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
13.4M
    if (ctxt != NULL)
577
13.4M
  ctxt->errNo = error;
578
13.4M
    __xmlRaiseError(NULL, NULL, NULL,
579
13.4M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
13.4M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
13.4M
    if (ctxt != NULL) {
582
13.4M
  ctxt->wellFormed = 0;
583
13.4M
  if (ctxt->recovery == 0)
584
453k
      ctxt->disableSAX = 1;
585
13.4M
    }
586
13.4M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
2.35M
{
604
2.35M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
2.35M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
2.35M
    if (ctxt != NULL)
608
2.35M
  ctxt->errNo = error;
609
2.35M
    __xmlRaiseError(NULL, NULL, NULL,
610
2.35M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
2.35M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
2.35M
        NULL, val, 0, msg, str1, val, str2);
613
2.35M
    if (ctxt != NULL) {
614
2.35M
  ctxt->wellFormed = 0;
615
2.35M
  if (ctxt->recovery == 0)
616
423k
      ctxt->disableSAX = 1;
617
2.35M
    }
618
2.35M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
9.17M
{
633
9.17M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
9.17M
        (ctxt->instate == XML_PARSER_EOF))
635
20
  return;
636
9.17M
    if (ctxt != NULL)
637
9.17M
  ctxt->errNo = error;
638
9.17M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
9.17M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
9.17M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
9.17M
                    val);
642
9.17M
    if (ctxt != NULL) {
643
9.17M
  ctxt->wellFormed = 0;
644
9.17M
  if (ctxt->recovery == 0)
645
2.70M
      ctxt->disableSAX = 1;
646
9.17M
    }
647
9.17M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
261k
{
662
261k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
261k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
261k
    if (ctxt != NULL)
666
261k
  ctxt->errNo = error;
667
261k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
261k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
261k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
261k
                    val);
671
261k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
1.73M
{
689
1.73M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
1.73M
        (ctxt->instate == XML_PARSER_EOF))
691
111
  return;
692
1.73M
    if (ctxt != NULL)
693
1.73M
  ctxt->errNo = error;
694
1.73M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
1.73M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
1.73M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
1.73M
                    info1, info2, info3);
698
1.73M
    if (ctxt != NULL)
699
1.73M
  ctxt->nsWellFormed = 0;
700
1.73M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
93.0k
{
718
93.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
93.0k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
93.0k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
93.0k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
93.0k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
93.0k
                    info1, info2, info3);
725
93.0k
}
726
727
static void
728
122M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
122M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
122M
    else
732
122M
        *dst += val;
733
122M
}
734
735
static void
736
33.4M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
33.4M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
33.4M
    else
740
33.4M
        *dst += val;
741
33.4M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
32.8M
{
770
32.8M
    unsigned long consumed;
771
32.8M
    xmlParserInputPtr input = ctxt->input;
772
32.8M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
32.8M
    consumed = input->parentConsumed;
779
32.8M
    if ((entity == NULL) ||
780
32.8M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
23.7M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
23.7M
        xmlSaturatedAdd(&consumed, input->consumed);
783
23.7M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
23.7M
    }
785
32.8M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
32.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
32.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
32.8M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
32.8M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
606
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
606
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
606
                       "Maximum entity amplification factor exceeded");
803
606
        xmlHaltParser(ctxt);
804
606
        return(1);
805
606
    }
806
807
32.8M
    return(0);
808
32.8M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
1.66M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
1.66M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
1.66M
    (void) sax;
1048
1049
1.66M
    if (ctxt == NULL) return;
1050
1.66M
    sax = ctxt->sax;
1051
1.66M
#ifdef LIBXML_SAX1_ENABLED
1052
1.66M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
1.66M
        ((sax->startElementNs != NULL) ||
1054
1.08M
         (sax->endElementNs != NULL) ||
1055
1.08M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.08M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
1.66M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
1.66M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
1.66M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
1.66M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
1.66M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
1.66M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
157k
{
1103
157k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
192k
    while (*src == 0x20) src++;
1107
7.11M
    while (*src != 0) {
1108
6.96M
  if (*src == 0x20) {
1109
773k
      while (*src == 0x20) src++;
1110
217k
      if (*src != 0)
1111
184k
    *dst++ = 0x20;
1112
6.74M
  } else {
1113
6.74M
      *dst++ = *src++;
1114
6.74M
  }
1115
6.96M
    }
1116
157k
    *dst = 0;
1117
157k
    if (dst == src)
1118
106k
       return(NULL);
1119
51.5k
    return(dst);
1120
157k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
79.4k
{
1136
79.4k
    int i;
1137
79.4k
    int remove_head = 0;
1138
79.4k
    int need_realloc = 0;
1139
79.4k
    const xmlChar *cur;
1140
1141
79.4k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
79.4k
    i = *len;
1144
79.4k
    if (i <= 0)
1145
5.28k
        return(NULL);
1146
1147
74.1k
    cur = src;
1148
89.5k
    while (*cur == 0x20) {
1149
15.3k
        cur++;
1150
15.3k
  remove_head++;
1151
15.3k
    }
1152
1.64M
    while (*cur != 0) {
1153
1.58M
  if (*cur == 0x20) {
1154
114k
      cur++;
1155
114k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
14.2k
          need_realloc = 1;
1157
14.2k
    break;
1158
14.2k
      }
1159
114k
  } else
1160
1.46M
      cur++;
1161
1.58M
    }
1162
74.1k
    if (need_realloc) {
1163
14.2k
        xmlChar *ret;
1164
1165
14.2k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
14.2k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
14.2k
  xmlAttrNormalizeSpace(ret, ret);
1171
14.2k
  *len = strlen((const char *)ret);
1172
14.2k
        return(ret);
1173
59.9k
    } else if (remove_head) {
1174
2.60k
        *len -= remove_head;
1175
2.60k
        memmove(src, src + remove_head, 1 + *len);
1176
2.60k
  return(src);
1177
2.60k
    }
1178
57.3k
    return(NULL);
1179
74.1k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
157k
               const xmlChar *value) {
1195
157k
    xmlDefAttrsPtr defaults;
1196
157k
    int len;
1197
157k
    const xmlChar *name;
1198
157k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
157k
    if (ctxt->attsSpecial != NULL) {
1204
122k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
33.2k
      return;
1206
122k
    }
1207
1208
124k
    if (ctxt->attsDefault == NULL) {
1209
40.6k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
40.6k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
40.6k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
124k
    name = xmlSplitQName3(fullname, &len);
1219
124k
    if (name == NULL) {
1220
111k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
111k
  prefix = NULL;
1222
111k
    } else {
1223
12.7k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
12.7k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
12.7k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
124k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
124k
    if (defaults == NULL) {
1232
69.2k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
69.2k
                     (4 * 5) * sizeof(const xmlChar *));
1234
69.2k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
69.2k
  defaults->nbAttrs = 0;
1237
69.2k
  defaults->maxAttrs = 4;
1238
69.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
69.2k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
69.2k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
2.93k
        xmlDefAttrsPtr temp;
1245
1246
2.93k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
2.93k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
2.93k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
2.93k
  defaults = temp;
1251
2.93k
  defaults->maxAttrs *= 2;
1252
2.93k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
2.93k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
2.93k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
124k
    name = xmlSplitQName3(fullattr, &len);
1264
124k
    if (name == NULL) {
1265
91.2k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
91.2k
  prefix = NULL;
1267
91.2k
    } else {
1268
33.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
33.2k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
33.2k
    }
1271
1272
124k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
124k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
124k
    len = xmlStrlen(value);
1276
124k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
124k
    if (value == NULL)
1278
0
        goto mem_error;
1279
124k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
124k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
124k
    if (ctxt->external)
1282
44.2k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
80.2k
    else
1284
80.2k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
124k
    defaults->nbAttrs++;
1286
1287
124k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
124k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
1.00M
{
1309
1.00M
    if (ctxt->attsSpecial == NULL) {
1310
82.4k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
82.4k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
82.4k
    }
1314
1315
1.00M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
103k
        return;
1317
1318
903k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
903k
                     (void *) (ptrdiff_t) type);
1320
903k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
1.00M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
646k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
646k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
646k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
207k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
207k
    }
1341
646k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
277k
{
1354
277k
    if (ctxt->attsSpecial == NULL)
1355
223k
        return;
1356
1357
54.0k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
54.0k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
11.3k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
11.3k
        ctxt->attsSpecial = NULL;
1362
11.3k
    }
1363
54.0k
    return;
1364
277k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
168k
{
1427
168k
    const xmlChar *cur = lang, *nxt;
1428
1429
168k
    if (cur == NULL)
1430
2.87k
        return (0);
1431
165k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
165k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
165k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
165k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
11.4k
        cur += 2;
1441
67.3k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
67.3k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
55.8k
            cur++;
1444
11.4k
        return(cur[0] == 0);
1445
11.4k
    }
1446
154k
    nxt = cur;
1447
598k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
598k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
444k
           nxt++;
1450
154k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
15.5k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
10.5k
            return(0);
1456
5.01k
        return(1);
1457
15.5k
    }
1458
138k
    if (nxt - cur < 2)
1459
15.7k
        return(0);
1460
    /* we got an ISO 639 code */
1461
123k
    if (nxt[0] == 0)
1462
9.91k
        return(1);
1463
113k
    if (nxt[0] != '-')
1464
9.28k
        return(0);
1465
1466
103k
    nxt++;
1467
103k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
103k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
9.36k
        goto region_m49;
1471
1472
429k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
429k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
334k
           nxt++;
1475
94.5k
    if (nxt - cur == 4)
1476
26.6k
        goto script;
1477
67.9k
    if (nxt - cur == 2)
1478
13.2k
        goto region;
1479
54.6k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
9.99k
        goto variant;
1481
44.6k
    if (nxt - cur != 3)
1482
12.0k
        return(0);
1483
    /* we parsed an extlang */
1484
32.6k
    if (nxt[0] == 0)
1485
1.46k
        return(1);
1486
31.1k
    if (nxt[0] != '-')
1487
6.30k
        return(0);
1488
1489
24.8k
    nxt++;
1490
24.8k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
24.8k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
3.28k
        goto region_m49;
1494
1495
153k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
153k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
131k
           nxt++;
1498
21.5k
    if (nxt - cur == 2)
1499
4.08k
        goto region;
1500
17.4k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
4.75k
        goto variant;
1502
12.7k
    if (nxt - cur != 4)
1503
8.89k
        return(0);
1504
    /* we parsed a script */
1505
30.4k
script:
1506
30.4k
    if (nxt[0] == 0)
1507
3.44k
        return(1);
1508
27.0k
    if (nxt[0] != '-')
1509
3.49k
        return(0);
1510
1511
23.5k
    nxt++;
1512
23.5k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
23.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
1.88k
        goto region_m49;
1516
1517
156k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
156k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
134k
           nxt++;
1520
1521
21.6k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
7.79k
        goto variant;
1523
13.8k
    if (nxt - cur != 2)
1524
10.3k
        return(0);
1525
    /* we parsed a region */
1526
24.9k
region:
1527
24.9k
    if (nxt[0] == 0)
1528
5.72k
        return(1);
1529
19.2k
    if (nxt[0] != '-')
1530
9.94k
        return(0);
1531
1532
9.28k
    nxt++;
1533
9.28k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
95.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
95.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
86.0k
           nxt++;
1538
1539
9.28k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
7.30k
        return(0);
1541
1542
    /* we parsed a variant */
1543
24.5k
variant:
1544
24.5k
    if (nxt[0] == 0)
1545
6.08k
        return(1);
1546
18.4k
    if (nxt[0] != '-')
1547
14.0k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
4.42k
    return (1);
1550
1551
14.5k
region_m49:
1552
14.5k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
14.5k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
4.07k
        nxt += 3;
1555
4.07k
        goto region;
1556
4.07k
    }
1557
10.4k
    return(0);
1558
14.5k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
599k
{
1584
599k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
250k
        int i;
1586
773k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
677k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
154k
          if (ctxt->nsTab[i + 1] == URL)
1590
73.5k
        return(-2);
1591
    /* out of scope keep it */
1592
80.8k
    break;
1593
154k
      }
1594
677k
  }
1595
250k
    }
1596
526k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
84.1k
  ctxt->nsMax = 10;
1598
84.1k
  ctxt->nsNr = 0;
1599
84.1k
  ctxt->nsTab = (const xmlChar **)
1600
84.1k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
84.1k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
442k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
16.1k
        const xmlChar ** tmp;
1608
16.1k
        ctxt->nsMax *= 2;
1609
16.1k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
16.1k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
16.1k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
16.1k
  ctxt->nsTab = tmp;
1617
16.1k
    }
1618
526k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
526k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
526k
    return (ctxt->nsNr);
1621
526k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
201k
{
1634
201k
    int i;
1635
1636
201k
    if (ctxt->nsTab == NULL) return(0);
1637
201k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
201k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
671k
    for (i = 0;i < nr;i++) {
1645
469k
         ctxt->nsNr--;
1646
469k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
469k
    }
1648
201k
    return(nr);
1649
201k
}
1650
#endif
1651
1652
static int
1653
162k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
162k
    const xmlChar **atts;
1655
162k
    int *attallocs;
1656
162k
    int maxatts;
1657
1658
162k
    if (nr + 5 > ctxt->maxatts) {
1659
162k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
162k
  atts = (const xmlChar **) xmlMalloc(
1661
162k
             maxatts * sizeof(const xmlChar *));
1662
162k
  if (atts == NULL) goto mem_error;
1663
162k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
162k
                               (maxatts / 5) * sizeof(int));
1665
162k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
162k
        if (ctxt->maxatts > 0)
1670
895
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
162k
        xmlFree(ctxt->atts);
1672
162k
  ctxt->atts = atts;
1673
162k
  ctxt->attallocs = attallocs;
1674
162k
  ctxt->maxatts = maxatts;
1675
162k
    }
1676
162k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
162k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
10.1M
{
1694
10.1M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
10.1M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
209
        size_t newSize = ctxt->inputMax * 2;
1698
209
        xmlParserInputPtr *tmp;
1699
1700
209
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
209
                                               newSize * sizeof(*tmp));
1702
209
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
209
        ctxt->inputTab = tmp;
1707
209
        ctxt->inputMax = newSize;
1708
209
    }
1709
10.1M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
10.1M
    ctxt->input = value;
1711
10.1M
    return (ctxt->inputNr++);
1712
10.1M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
12.1M
{
1724
12.1M
    xmlParserInputPtr ret;
1725
1726
12.1M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
12.1M
    if (ctxt->inputNr <= 0)
1729
2.07M
        return (NULL);
1730
10.1M
    ctxt->inputNr--;
1731
10.1M
    if (ctxt->inputNr > 0)
1732
9.11M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
1.00M
    else
1734
1.00M
        ctxt->input = NULL;
1735
10.1M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
10.1M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
10.1M
    return (ret);
1738
12.1M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
11.1M
{
1751
11.1M
    if (ctxt == NULL) return(0);
1752
11.1M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
75.6k
        xmlNodePtr *tmp;
1754
1755
75.6k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
75.6k
                                      ctxt->nodeMax * 2 *
1757
75.6k
                                      sizeof(ctxt->nodeTab[0]));
1758
75.6k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
75.6k
        ctxt->nodeTab = tmp;
1763
75.6k
  ctxt->nodeMax *= 2;
1764
75.6k
    }
1765
11.1M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
11.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
231
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
231
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
231
        xmlParserMaxDepth);
1770
231
  xmlHaltParser(ctxt);
1771
231
  return(-1);
1772
231
    }
1773
11.1M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
11.1M
    ctxt->node = value;
1775
11.1M
    return (ctxt->nodeNr++);
1776
11.1M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
9.30M
{
1789
9.30M
    xmlNodePtr ret;
1790
1791
9.30M
    if (ctxt == NULL) return(NULL);
1792
9.30M
    if (ctxt->nodeNr <= 0)
1793
424k
        return (NULL);
1794
8.88M
    ctxt->nodeNr--;
1795
8.88M
    if (ctxt->nodeNr > 0)
1796
7.48M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
1.40M
    else
1798
1.40M
        ctxt->node = NULL;
1799
8.88M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
8.88M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
8.88M
    return (ret);
1802
9.30M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
10.8M
{
1821
10.8M
    xmlStartTag *tag;
1822
1823
10.8M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
158k
        const xmlChar * *tmp;
1825
158k
        xmlStartTag *tmp2;
1826
158k
        ctxt->nameMax *= 2;
1827
158k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
158k
                                    ctxt->nameMax *
1829
158k
                                    sizeof(ctxt->nameTab[0]));
1830
158k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
158k
  ctxt->nameTab = tmp;
1835
158k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
158k
                                    ctxt->nameMax *
1837
158k
                                    sizeof(ctxt->pushTab[0]));
1838
158k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
158k
  ctxt->pushTab = tmp2;
1843
10.6M
    } else if (ctxt->pushTab == NULL) {
1844
587k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
587k
                                            sizeof(ctxt->pushTab[0]));
1846
587k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
587k
    }
1849
10.8M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
10.8M
    ctxt->name = value;
1851
10.8M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
10.8M
    tag->prefix = prefix;
1853
10.8M
    tag->URI = URI;
1854
10.8M
    tag->line = line;
1855
10.8M
    tag->nsNr = nsNr;
1856
10.8M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
10.8M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.42M
{
1873
1.42M
    const xmlChar *ret;
1874
1875
1.42M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.42M
    ctxt->nameNr--;
1878
1.42M
    if (ctxt->nameNr > 0)
1879
1.40M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
23.9k
    else
1881
23.9k
        ctxt->name = NULL;
1882
1.42M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.42M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.42M
    return (ret);
1885
1.42M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
4.30M
{
1931
4.30M
    const xmlChar *ret;
1932
1933
4.30M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
4.30M
    ctxt->nameNr--;
1936
4.30M
    if (ctxt->nameNr > 0)
1937
4.15M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
158k
    else
1939
158k
        ctxt->name = NULL;
1940
4.30M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
4.30M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
4.30M
    return (ret);
1943
4.30M
}
1944
1945
13.1M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
13.1M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
89.5k
        int *tmp;
1948
1949
89.5k
  ctxt->spaceMax *= 2;
1950
89.5k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
89.5k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
89.5k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
89.5k
  ctxt->spaceTab = tmp;
1958
89.5k
    }
1959
13.1M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
13.1M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
13.1M
    return(ctxt->spaceNr++);
1962
13.1M
}
1963
1964
10.8M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
10.8M
    int ret;
1966
10.8M
    if (ctxt->spaceNr <= 0) return(0);
1967
10.7M
    ctxt->spaceNr--;
1968
10.7M
    if (ctxt->spaceNr > 0)
1969
10.3M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
403k
    else
1971
403k
        ctxt->space = &ctxt->spaceTab[0];
1972
10.7M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
10.7M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
10.7M
    return(ret);
1975
10.8M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
289M
#define RAW (*ctxt->input->cur)
2013
180M
#define CUR (*ctxt->input->cur)
2014
212M
#define NXT(val) ctxt->input->cur[(val)]
2015
17.0M
#define CUR_PTR ctxt->input->cur
2016
3.93M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
67.1M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
33.8M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
62.6M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
55.0M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
47.6M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
41.2M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
19.4M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
19.4M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
257k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
257k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
68.6M
#define SKIP(val) do {             \
2037
68.6M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
68.6M
    if (*ctxt->input->cur == 0)           \
2039
68.6M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
68.6M
  } while (0)
2041
2042
128k
#define SKIPL(val) do {             \
2043
128k
    int skipl;                \
2044
13.9M
    for(skipl=0; skipl<val; skipl++) {         \
2045
13.8M
  if (*(ctxt->input->cur) == '\n') {       \
2046
203k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
13.6M
  } else ctxt->input->col++;         \
2048
13.8M
  ctxt->input->cur++;           \
2049
13.8M
    }                  \
2050
128k
    if (*ctxt->input->cur == 0)           \
2051
128k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
128k
  } while (0)
2053
2054
131M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
131M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
131M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
131M
  xmlSHRINK (ctxt);
2058
2059
2.77M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
2.77M
    if ((ctxt->input->buf) &&
2062
2.77M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
7.74k
        xmlParserInputShrink(ctxt->input);
2064
2.77M
    if (*ctxt->input->cur == 0)
2065
96.6k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
2.77M
}
2067
2068
385M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
385M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
385M
  xmlGROW (ctxt);
2071
2072
34.4M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
34.4M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
34.4M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
34.4M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
34.4M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
34.4M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
34.4M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
34.4M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
34.4M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
34.4M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
34.4M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
738k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
34.4M
}
2095
2096
83.5M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
146M
#define NEXT xmlNextChar(ctxt)
2099
2100
18.7M
#define NEXT1 {               \
2101
18.7M
  ctxt->input->col++;           \
2102
18.7M
  ctxt->input->cur++;           \
2103
18.7M
  if (*ctxt->input->cur == 0)         \
2104
18.7M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
18.7M
    }
2106
2107
331M
#define NEXTL(l) do {             \
2108
331M
    if (*(ctxt->input->cur) == '\n') {         \
2109
4.92M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
326M
    } else ctxt->input->col++;           \
2111
331M
    ctxt->input->cur += l;        \
2112
331M
  } while (0)
2113
2114
361M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
3.80G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
4.03G
    if (l == 1) b[i++] = v;           \
2119
4.03G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
83.5M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
83.5M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
83.5M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
83.5M
        (ctxt->instate == XML_PARSER_START)) {
2141
44.2M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
44.2M
  cur = ctxt->input->cur;
2146
44.2M
  while (IS_BLANK_CH(*cur)) {
2147
17.3M
      if (*cur == '\n') {
2148
1.73M
    ctxt->input->line++; ctxt->input->col = 1;
2149
15.5M
      } else {
2150
15.5M
    ctxt->input->col++;
2151
15.5M
      }
2152
17.3M
      cur++;
2153
17.3M
      if (res < INT_MAX)
2154
17.3M
    res++;
2155
17.3M
      if (*cur == 0) {
2156
58.4k
    ctxt->input->cur = cur;
2157
58.4k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
58.4k
    cur = ctxt->input->cur;
2159
58.4k
      }
2160
17.3M
  }
2161
44.2M
  ctxt->input->cur = cur;
2162
44.2M
    } else {
2163
39.2M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
115M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
115M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
58.1M
    NEXT;
2168
58.1M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
9.74M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
559k
                    break;
2174
9.18M
          xmlParsePEReference(ctxt);
2175
47.7M
            } else if (CUR == 0) {
2176
9.16M
                unsigned long consumed;
2177
9.16M
                xmlEntityPtr ent;
2178
2179
9.16M
                if (ctxt->inputNr <= 1)
2180
62.6k
                    break;
2181
2182
9.10M
                consumed = ctxt->input->consumed;
2183
9.10M
                xmlSaturatedAddSizeT(&consumed,
2184
9.10M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
9.10M
                ent = ctxt->input->entity;
2191
9.10M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
9.10M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
2.71k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
2.71k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
2.71k
                }
2197
2198
9.10M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
9.10M
                xmlPopInput(ctxt);
2201
38.6M
            } else {
2202
38.6M
                break;
2203
38.6M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
76.4M
      if (res < INT_MAX)
2213
76.4M
    res++;
2214
76.4M
        }
2215
39.2M
    }
2216
83.5M
    return(res);
2217
83.5M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
9.10M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
9.10M
    xmlParserInputPtr input;
2237
2238
9.10M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
9.10M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
9.10M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
9.10M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
9.10M
    input = inputPop(ctxt);
2247
9.10M
    if (input->entity != NULL)
2248
9.10M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
9.10M
    xmlFreeInputStream(input);
2250
9.10M
    if (*ctxt->input->cur == 0)
2251
4.29M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
9.10M
    return(CUR);
2253
9.10M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
9.18M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
9.18M
    int ret;
2267
9.18M
    if (input == NULL) return(-1);
2268
2269
9.16M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
9.16M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
9.16M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
9.16M
    ret = inputPush(ctxt, input);
2285
9.16M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
9.16M
    GROW;
2288
9.16M
    return(ret);
2289
9.16M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.72M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.72M
    int val = 0;
2311
1.72M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.72M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.72M
        (NXT(2) == 'x')) {
2318
485k
  SKIP(3);
2319
485k
  GROW;
2320
1.77M
  while (RAW != ';') { /* loop blocked by count */
2321
1.38M
      if (count++ > 20) {
2322
36.7k
    count = 0;
2323
36.7k
    GROW;
2324
36.7k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
36.7k
      }
2327
1.38M
      if ((RAW >= '0') && (RAW <= '9'))
2328
786k
          val = val * 16 + (CUR - '0');
2329
603k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
357k
          val = val * 16 + (CUR - 'a') + 10;
2331
246k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
146k
          val = val * 16 + (CUR - 'A') + 10;
2333
100k
      else {
2334
100k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
100k
    val = 0;
2336
100k
    break;
2337
100k
      }
2338
1.28M
      if (val > 0x110000)
2339
426k
          val = 0x110000;
2340
2341
1.28M
      NEXT;
2342
1.28M
      count++;
2343
1.28M
  }
2344
485k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
385k
      ctxt->input->col++;
2347
385k
      ctxt->input->cur++;
2348
385k
  }
2349
1.23M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
1.23M
  SKIP(2);
2351
1.23M
  GROW;
2352
4.50M
  while (RAW != ';') { /* loop blocked by count */
2353
3.45M
      if (count++ > 20) {
2354
25.4k
    count = 0;
2355
25.4k
    GROW;
2356
25.4k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
25.4k
      }
2359
3.45M
      if ((RAW >= '0') && (RAW <= '9'))
2360
3.26M
          val = val * 10 + (CUR - '0');
2361
191k
      else {
2362
191k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
191k
    val = 0;
2364
191k
    break;
2365
191k
      }
2366
3.26M
      if (val > 0x110000)
2367
270k
          val = 0x110000;
2368
2369
3.26M
      NEXT;
2370
3.26M
      count++;
2371
3.26M
  }
2372
1.23M
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
1.04M
      ctxt->input->col++;
2375
1.04M
      ctxt->input->cur++;
2376
1.04M
  }
2377
1.23M
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.72M
    if (val >= 0x110000) {
2389
9.11k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
9.11k
                "xmlParseCharRef: character reference out of bounds\n",
2391
9.11k
          val);
2392
1.71M
    } else if (IS_CHAR(val)) {
2393
1.37M
        return(val);
2394
1.37M
    } else {
2395
339k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
339k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
339k
                    val);
2398
339k
    }
2399
348k
    return(0);
2400
1.72M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
249k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
249k
    const xmlChar *ptr;
2423
249k
    xmlChar cur;
2424
249k
    int val = 0;
2425
2426
249k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
249k
    ptr = *str;
2428
249k
    cur = *ptr;
2429
249k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
48.5k
  ptr += 3;
2431
48.5k
  cur = *ptr;
2432
186k
  while (cur != ';') { /* Non input consuming loop */
2433
143k
      if ((cur >= '0') && (cur <= '9'))
2434
61.5k
          val = val * 16 + (cur - '0');
2435
82.1k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
13.0k
          val = val * 16 + (cur - 'a') + 10;
2437
69.1k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
63.5k
          val = val * 16 + (cur - 'A') + 10;
2439
5.55k
      else {
2440
5.55k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
5.55k
    val = 0;
2442
5.55k
    break;
2443
5.55k
      }
2444
138k
      if (val > 0x110000)
2445
72.2k
          val = 0x110000;
2446
2447
138k
      ptr++;
2448
138k
      cur = *ptr;
2449
138k
  }
2450
48.5k
  if (cur == ';')
2451
42.9k
      ptr++;
2452
201k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
201k
  ptr += 2;
2454
201k
  cur = *ptr;
2455
723k
  while (cur != ';') { /* Non input consuming loops */
2456
534k
      if ((cur >= '0') && (cur <= '9'))
2457
521k
          val = val * 10 + (cur - '0');
2458
12.3k
      else {
2459
12.3k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
12.3k
    val = 0;
2461
12.3k
    break;
2462
12.3k
      }
2463
521k
      if (val > 0x110000)
2464
38.5k
          val = 0x110000;
2465
2466
521k
      ptr++;
2467
521k
      cur = *ptr;
2468
521k
  }
2469
201k
  if (cur == ';')
2470
188k
      ptr++;
2471
201k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
249k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
249k
    if (val >= 0x110000) {
2483
466
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
466
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
466
                val);
2486
249k
    } else if (IS_CHAR(val)) {
2487
224k
        return(val);
2488
224k
    } else {
2489
24.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
24.7k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
24.7k
        val);
2492
24.7k
    }
2493
25.2k
    return(0);
2494
249k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.99M
#define growBuffer(buffer, n) {           \
2593
1.99M
    xmlChar *tmp;             \
2594
1.99M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.99M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.99M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.99M
    if (tmp == NULL) goto mem_error;         \
2598
1.99M
    buffer = tmp;             \
2599
1.99M
    buffer##_size = new_size;                                           \
2600
1.99M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
23.5M
                           int check) {
2617
23.5M
    xmlChar *buffer = NULL;
2618
23.5M
    size_t buffer_size = 0;
2619
23.5M
    size_t nbchars = 0;
2620
2621
23.5M
    xmlChar *current = NULL;
2622
23.5M
    xmlChar *rep = NULL;
2623
23.5M
    const xmlChar *last;
2624
23.5M
    xmlEntityPtr ent;
2625
23.5M
    int c,l;
2626
2627
23.5M
    if (str == NULL)
2628
10.4k
        return(NULL);
2629
23.5M
    last = str + len;
2630
2631
23.5M
    if (((ctxt->depth > 40) &&
2632
23.5M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
23.5M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
23.5M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
23.5M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
23.5M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
23.5M
    if (str < last)
2651
23.3M
  c = CUR_SCHAR(str, l);
2652
128k
    else
2653
128k
        c = 0;
2654
3.13G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
3.13G
           (c != end2) && (c != end3) &&
2656
3.13G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
3.11G
  if (c == 0) break;
2659
3.11G
        if ((c == '&') && (str[1] == '#')) {
2660
249k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
249k
      if (val == 0)
2662
25.2k
                goto int_error;
2663
224k
      COPY_BUF(0,buffer,nbchars,val);
2664
224k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
476
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
476
      }
2667
3.11G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
25.6M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
25.6M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
25.6M
      if ((ent != NULL) &&
2674
25.6M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
24.1k
    if (ent->content != NULL) {
2676
24.1k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
24.1k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
1.30k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
1.30k
        }
2680
24.1k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
25.6M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
21.2M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
307
                    goto int_error;
2688
2689
21.2M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
479
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
479
                    xmlHaltParser(ctxt);
2692
479
                    ent->content[0] = 0;
2693
479
                    goto int_error;
2694
479
                }
2695
2696
21.2M
                ent->flags |= XML_ENT_EXPANDING;
2697
21.2M
    ctxt->depth++;
2698
21.2M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
21.2M
                        ent->length, what, 0, 0, 0, check);
2700
21.2M
    ctxt->depth--;
2701
21.2M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
21.2M
    if (rep == NULL) {
2704
7.15k
                    ent->content[0] = 0;
2705
7.15k
                    goto int_error;
2706
7.15k
                }
2707
2708
21.2M
                current = rep;
2709
13.0G
                while (*current != 0) { /* non input consuming loop */
2710
13.0G
                    buffer[nbchars++] = *current++;
2711
13.0G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
2.68M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
2.68M
                    }
2714
13.0G
                }
2715
21.2M
                xmlFree(rep);
2716
21.2M
                rep = NULL;
2717
21.2M
      } else if (ent != NULL) {
2718
45.4k
    int i = xmlStrlen(ent->name);
2719
45.4k
    const xmlChar *cur = ent->name;
2720
2721
45.4k
    buffer[nbchars++] = '&';
2722
45.4k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
1.22k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
1.22k
    }
2725
256k
    for (;i > 0;i--)
2726
210k
        buffer[nbchars++] = *cur++;
2727
45.4k
    buffer[nbchars++] = ';';
2728
45.4k
      }
2729
3.08G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
370k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
370k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
370k
      if (ent != NULL) {
2735
322k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
3.19k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
3.19k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
3.19k
      (ctxt->validate != 0)) {
2745
2.99k
      xmlLoadEntityContent(ctxt, ent);
2746
2.99k
        } else {
2747
202
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
202
      "not validating will not read content for PE entity %s\n",
2749
202
                          ent->name, NULL);
2750
202
        }
2751
3.19k
    }
2752
2753
322k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
130
                    goto int_error;
2755
2756
322k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
762
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
762
                    xmlHaltParser(ctxt);
2759
762
                    if (ent->content != NULL)
2760
403
                        ent->content[0] = 0;
2761
762
                    goto int_error;
2762
762
                }
2763
2764
321k
                ent->flags |= XML_ENT_EXPANDING;
2765
321k
    ctxt->depth++;
2766
321k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
321k
                        ent->length, what, 0, 0, 0, check);
2768
321k
    ctxt->depth--;
2769
321k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
321k
    if (rep == NULL) {
2772
2.00k
                    if (ent->content != NULL)
2773
452
                        ent->content[0] = 0;
2774
2.00k
                    goto int_error;
2775
2.00k
                }
2776
319k
                current = rep;
2777
1.25G
                while (*current != 0) { /* non input consuming loop */
2778
1.25G
                    buffer[nbchars++] = *current++;
2779
1.25G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
78.5k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
78.5k
                    }
2782
1.25G
                }
2783
319k
                xmlFree(rep);
2784
319k
                rep = NULL;
2785
319k
      }
2786
3.08G
  } else {
2787
3.08G
      COPY_BUF(l,buffer,nbchars,c);
2788
3.08G
      str += l;
2789
3.08G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
909k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
909k
      }
2792
3.08G
  }
2793
3.11G
  if (str < last)
2794
3.08G
      c = CUR_SCHAR(str, l);
2795
23.3M
  else
2796
23.3M
      c = 0;
2797
3.11G
    }
2798
23.4M
    buffer[nbchars] = 0;
2799
23.4M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
36.0k
int_error:
2804
36.0k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
36.0k
    if (buffer != NULL)
2807
36.0k
        xmlFree(buffer);
2808
36.0k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
5.52k
                           xmlChar end3) {
2836
5.52k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
5.52k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
5.52k
                                      end, end2, end3, 0));
2840
5.52k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
242k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
242k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
242k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
242k
                                      end, end2, end3, 0));
2868
242k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
7.66M
                     int blank_chars) {
2890
7.66M
    int i, ret;
2891
7.66M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
7.66M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
1.92M
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
5.73M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
5.73M
        (*(ctxt->space) == -2))
2905
2.54M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
3.19M
    if (blank_chars == 0) {
2911
4.20M
  for (i = 0;i < len;i++)
2912
3.77M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.34M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
2.27M
    if (ctxt->node == NULL) return(0);
2919
2.15M
    if (ctxt->myDoc != NULL) {
2920
2.15M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.15M
        if (ret == 0) return(1);
2922
2.01M
        if (ret == 1) return(0);
2923
2.01M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
2.00M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
1.93M
    if ((ctxt->node->children == NULL) &&
2930
1.93M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
1.91M
    lastChild = xmlGetLastChild(ctxt->node);
2933
1.91M
    if (lastChild == NULL) {
2934
829k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
829k
            (ctxt->node->content != NULL)) return(0);
2936
1.08M
    } else if (xmlNodeIsText(lastChild))
2937
116k
        return(0);
2938
968k
    else if ((ctxt->node->children != NULL) &&
2939
968k
             (xmlNodeIsText(ctxt->node->children)))
2940
41.9k
        return(0);
2941
1.75M
    return(1);
2942
1.91M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
7.27M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
7.27M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
7.27M
    xmlChar *buffer = NULL;
2973
7.27M
    int len = 0;
2974
7.27M
    int max = XML_MAX_NAMELEN;
2975
7.27M
    xmlChar *ret = NULL;
2976
7.27M
    const xmlChar *cur = name;
2977
7.27M
    int c;
2978
2979
7.27M
    if (prefix == NULL) return(NULL);
2980
7.27M
    *prefix = NULL;
2981
2982
7.27M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
7.27M
    if (cur[0] == ':')
2993
23.5k
  return(xmlStrdup(name));
2994
2995
7.24M
    c = *cur++;
2996
39.9M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
32.6M
  buf[len++] = c;
2998
32.6M
  c = *cur++;
2999
32.6M
    }
3000
7.24M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
21.5k
  max = len * 2;
3006
3007
21.5k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
21.5k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
21.5k
  memcpy(buffer, buf, len);
3013
2.59M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
2.56M
      if (len + 10 > max) {
3015
6.70k
          xmlChar *tmp;
3016
3017
6.70k
    max *= 2;
3018
6.70k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
6.70k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
6.70k
    buffer = tmp;
3025
6.70k
      }
3026
2.56M
      buffer[len++] = c;
3027
2.56M
      c = *cur++;
3028
2.56M
  }
3029
21.5k
  buffer[len] = 0;
3030
21.5k
    }
3031
3032
7.24M
    if ((c == ':') && (*cur == 0)) {
3033
52.5k
        if (buffer != NULL)
3034
947
      xmlFree(buffer);
3035
52.5k
  *prefix = NULL;
3036
52.5k
  return(xmlStrdup(name));
3037
52.5k
    }
3038
3039
7.19M
    if (buffer == NULL)
3040
7.17M
  ret = xmlStrndup(buf, len);
3041
20.5k
    else {
3042
20.5k
  ret = buffer;
3043
20.5k
  buffer = NULL;
3044
20.5k
  max = XML_MAX_NAMELEN;
3045
20.5k
    }
3046
3047
3048
7.19M
    if (c == ':') {
3049
1.86M
  c = *cur;
3050
1.86M
        *prefix = ret;
3051
1.86M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
1.86M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
1.86M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
1.86M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
1.86M
        (c == '_') || (c == ':'))) {
3063
75.2k
      int l;
3064
75.2k
      int first = CUR_SCHAR(cur, l);
3065
3066
75.2k
      if (!IS_LETTER(first) && (first != '_')) {
3067
26.2k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
26.2k
          "Name %s is not XML Namespace compliant\n",
3069
26.2k
          name);
3070
26.2k
      }
3071
75.2k
  }
3072
1.86M
  cur++;
3073
3074
13.7M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
11.8M
      buf[len++] = c;
3076
11.8M
      c = *cur++;
3077
11.8M
  }
3078
1.86M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
13.3k
      max = len * 2;
3084
3085
13.3k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
13.3k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
13.3k
      memcpy(buffer, buf, len);
3091
1.28M
      while (c != 0) { /* tested bigname2.xml */
3092
1.26M
    if (len + 10 > max) {
3093
3.71k
        xmlChar *tmp;
3094
3095
3.71k
        max *= 2;
3096
3.71k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
3.71k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
3.71k
        buffer = tmp;
3103
3.71k
    }
3104
1.26M
    buffer[len++] = c;
3105
1.26M
    c = *cur++;
3106
1.26M
      }
3107
13.3k
      buffer[len] = 0;
3108
13.3k
  }
3109
3110
1.86M
  if (buffer == NULL)
3111
1.85M
      ret = xmlStrndup(buf, len);
3112
13.3k
  else {
3113
13.3k
      ret = buffer;
3114
13.3k
  }
3115
1.86M
    }
3116
3117
7.19M
    return(ret);
3118
7.19M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
29.3M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
29.3M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
21.4M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
21.4M
      (((c >= 'a') && (c <= 'z')) ||
3160
21.4M
       ((c >= 'A') && (c <= 'Z')) ||
3161
21.4M
       (c == '_') || (c == ':') ||
3162
21.4M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
21.4M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
21.4M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
21.4M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
21.4M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
21.4M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
21.4M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
21.4M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
21.4M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
21.4M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
21.4M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
21.4M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
20.2M
      return(1);
3175
21.4M
    } else {
3176
7.90M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
7.42M
      return(1);
3178
7.90M
    }
3179
1.68M
    return(0);
3180
29.3M
}
3181
3182
static int
3183
686M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
686M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
602M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
602M
      (((c >= 'a') && (c <= 'z')) ||
3191
601M
       ((c >= 'A') && (c <= 'Z')) ||
3192
601M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
601M
       (c == '_') || (c == ':') ||
3194
601M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
601M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
601M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
601M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
601M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
601M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
601M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
601M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
601M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
601M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
601M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
601M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
601M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
601M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
601M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
581M
       return(1);
3210
602M
    } else {
3211
84.2M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
84.2M
            (c == '.') || (c == '-') ||
3213
84.2M
      (c == '_') || (c == ':') ||
3214
84.2M
      (IS_COMBINING(c)) ||
3215
84.2M
      (IS_EXTENDER(c)))
3216
76.7M
      return(1);
3217
84.2M
    }
3218
27.7M
    return(0);
3219
686M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
4.07M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
4.07M
    int len = 0, l;
3227
4.07M
    int c;
3228
4.07M
    int count = 0;
3229
4.07M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
1.78M
                    XML_MAX_TEXT_LENGTH :
3231
4.07M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
4.07M
    GROW;
3241
4.07M
    if (ctxt->instate == XML_PARSER_EOF)
3242
27
        return(NULL);
3243
4.07M
    c = CUR_CHAR(l);
3244
4.07M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
2.25M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
2.25M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
2.12M
         ((c >= 'A') && (c <= 'Z')) ||
3252
2.12M
         (c == '_') || (c == ':') ||
3253
2.12M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
2.12M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
2.12M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
2.12M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
2.12M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
2.12M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
2.12M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
2.12M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
2.12M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
2.12M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
2.12M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
2.12M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.51M
      return(NULL);
3266
1.51M
  }
3267
743k
  len += l;
3268
743k
  NEXTL(l);
3269
743k
  c = CUR_CHAR(l);
3270
15.4M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
15.4M
         (((c >= 'a') && (c <= 'z')) ||
3272
15.2M
          ((c >= 'A') && (c <= 'Z')) ||
3273
15.2M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
15.2M
          (c == '_') || (c == ':') ||
3275
15.2M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
15.2M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
15.2M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
15.2M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
15.2M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
15.2M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
15.2M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
15.2M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
15.2M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
15.2M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
15.2M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
15.2M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
15.2M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
15.2M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
15.2M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
15.2M
    )) {
3291
14.7M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
56.1k
    count = 0;
3293
56.1k
    GROW;
3294
56.1k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
56.1k
      }
3297
14.7M
            if (len <= INT_MAX - l)
3298
14.7M
          len += l;
3299
14.7M
      NEXTL(l);
3300
14.7M
      c = CUR_CHAR(l);
3301
14.7M
  }
3302
1.81M
    } else {
3303
1.81M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
1.81M
      (!IS_LETTER(c) && (c != '_') &&
3305
1.71M
       (c != ':'))) {
3306
1.27M
      return(NULL);
3307
1.27M
  }
3308
544k
  len += l;
3309
544k
  NEXTL(l);
3310
544k
  c = CUR_CHAR(l);
3311
3312
10.7M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
10.7M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
10.5M
    (c == '.') || (c == '-') ||
3315
10.5M
    (c == '_') || (c == ':') ||
3316
10.5M
    (IS_COMBINING(c)) ||
3317
10.5M
    (IS_EXTENDER(c)))) {
3318
10.1M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
37.8k
    count = 0;
3320
37.8k
    GROW;
3321
37.8k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
37.8k
      }
3324
10.1M
            if (len <= INT_MAX - l)
3325
10.1M
          len += l;
3326
10.1M
      NEXTL(l);
3327
10.1M
      c = CUR_CHAR(l);
3328
10.1M
  }
3329
544k
    }
3330
1.28M
    if (len > maxLength) {
3331
10
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
10
        return(NULL);
3333
10
    }
3334
1.28M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
1.28M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
7.03k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
1.28M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
1.28M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
31.5M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
31.5M
    const xmlChar *in;
3370
31.5M
    const xmlChar *ret;
3371
31.5M
    size_t count = 0;
3372
31.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
7.97M
                       XML_MAX_TEXT_LENGTH :
3374
31.5M
                       XML_MAX_NAME_LENGTH;
3375
3376
31.5M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
31.5M
    in = ctxt->input->cur;
3386
31.5M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
31.5M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
31.5M
  (*in == '_') || (*in == ':')) {
3389
28.2M
  in++;
3390
136M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
136M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
136M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
136M
         (*in == '_') || (*in == '-') ||
3394
136M
         (*in == ':') || (*in == '.'))
3395
108M
      in++;
3396
28.2M
  if ((*in > 0) && (*in < 0x80)) {
3397
27.4M
      count = in - ctxt->input->cur;
3398
27.4M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
27.4M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
27.4M
      ctxt->input->cur = in;
3404
27.4M
      ctxt->input->col += count;
3405
27.4M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
27.4M
      return(ret);
3408
27.4M
  }
3409
28.2M
    }
3410
    /* accelerator for special cases */
3411
4.07M
    return(xmlParseNameComplex(ctxt));
3412
31.5M
}
3413
3414
static const xmlChar *
3415
2.93M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
2.93M
    int len = 0, l;
3417
2.93M
    int c;
3418
2.93M
    int count = 0;
3419
2.93M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
866k
                    XML_MAX_TEXT_LENGTH :
3421
2.93M
                    XML_MAX_NAME_LENGTH;
3422
2.93M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
2.93M
    GROW;
3432
2.93M
    startPosition = CUR_PTR - BASE_PTR;
3433
2.93M
    c = CUR_CHAR(l);
3434
2.93M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
2.93M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
1.93M
  return(NULL);
3437
1.93M
    }
3438
3439
17.8M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
17.8M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
16.8M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
50.5k
      count = 0;
3443
50.5k
      GROW;
3444
50.5k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
50.5k
  }
3447
16.8M
        if (len <= INT_MAX - l)
3448
16.8M
      len += l;
3449
16.8M
  NEXTL(l);
3450
16.8M
  c = CUR_CHAR(l);
3451
16.8M
  if (c == 0) {
3452
68.5k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
68.5k
      ctxt->input->cur -= l;
3459
68.5k
      GROW;
3460
68.5k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
68.5k
      ctxt->input->cur += l;
3463
68.5k
      c = CUR_CHAR(l);
3464
68.5k
  }
3465
16.8M
    }
3466
998k
    if (len > maxLength) {
3467
6
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
6
        return(NULL);
3469
6
    }
3470
998k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
998k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
19.1M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
19.1M
    const xmlChar *in, *e;
3491
19.1M
    const xmlChar *ret;
3492
19.1M
    size_t count = 0;
3493
19.1M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
4.48M
                       XML_MAX_TEXT_LENGTH :
3495
19.1M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
19.1M
    in = ctxt->input->cur;
3505
19.1M
    e = ctxt->input->end;
3506
19.1M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
19.1M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
19.1M
   (*in == '_')) && (in < e)) {
3509
16.7M
  in++;
3510
65.3M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
65.3M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
65.3M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
65.3M
          (*in == '_') || (*in == '-') ||
3514
65.3M
          (*in == '.')) && (in < e))
3515
48.5M
      in++;
3516
16.7M
  if (in >= e)
3517
4.28k
      goto complex;
3518
16.7M
  if ((*in > 0) && (*in < 0x80)) {
3519
16.1M
      count = in - ctxt->input->cur;
3520
16.1M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
16.1M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
16.1M
      ctxt->input->cur = in;
3526
16.1M
      ctxt->input->col += count;
3527
16.1M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
16.1M
      return(ret);
3531
16.1M
  }
3532
16.7M
    }
3533
2.93M
complex:
3534
2.93M
    return(xmlParseNCNameComplex(ctxt));
3535
19.1M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
2.98M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
2.98M
    register const xmlChar *cmp = other;
3551
2.98M
    register const xmlChar *in;
3552
2.98M
    const xmlChar *ret;
3553
3554
2.98M
    GROW;
3555
2.98M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
2.98M
    in = ctxt->input->cur;
3559
13.8M
    while (*in != 0 && *in == *cmp) {
3560
10.8M
  ++in;
3561
10.8M
  ++cmp;
3562
10.8M
    }
3563
2.98M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
2.21M
  ctxt->input->col += in - ctxt->input->cur;
3566
2.21M
  ctxt->input->cur = in;
3567
2.21M
  return (const xmlChar*) 1;
3568
2.21M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
772k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
772k
    if (ret == other) {
3573
32.4k
  return (const xmlChar*) 1;
3574
32.4k
    }
3575
740k
    return ret;
3576
772k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
26.6M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
26.6M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
26.6M
    const xmlChar *cur = *str;
3600
26.6M
    int len = 0, l;
3601
26.6M
    int c;
3602
26.6M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
5.18M
                    XML_MAX_TEXT_LENGTH :
3604
26.6M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
26.6M
    c = CUR_SCHAR(cur, l);
3611
26.6M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
29.0k
  return(NULL);
3613
29.0k
    }
3614
3615
26.6M
    COPY_BUF(l,buf,len,c);
3616
26.6M
    cur += l;
3617
26.6M
    c = CUR_SCHAR(cur, l);
3618
282M
    while (xmlIsNameChar(ctxt, c)) {
3619
257M
  COPY_BUF(l,buf,len,c);
3620
257M
  cur += l;
3621
257M
  c = CUR_SCHAR(cur, l);
3622
257M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
1.55M
      xmlChar *buffer;
3628
1.55M
      int max = len * 2;
3629
3630
1.55M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
1.55M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
1.55M
      memcpy(buffer, buf, len);
3636
380M
      while (xmlIsNameChar(ctxt, c)) {
3637
379M
    if (len + 10 > max) {
3638
1.57M
        xmlChar *tmp;
3639
3640
1.57M
        max *= 2;
3641
1.57M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
1.57M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
1.57M
        buffer = tmp;
3648
1.57M
    }
3649
379M
    COPY_BUF(l,buffer,len,c);
3650
379M
    cur += l;
3651
379M
    c = CUR_SCHAR(cur, l);
3652
379M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
379M
      }
3658
1.55M
      buffer[len] = 0;
3659
1.55M
      *str = cur;
3660
1.55M
      return(buffer);
3661
1.55M
  }
3662
257M
    }
3663
25.0M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
25.0M
    *str = cur;
3668
25.0M
    return(xmlStrndup(buf, len));
3669
25.0M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
535k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
535k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
535k
    int len = 0, l;
3690
535k
    int c;
3691
535k
    int count = 0;
3692
535k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
153k
                    XML_MAX_TEXT_LENGTH :
3694
535k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
535k
    GROW;
3701
535k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
535k
    c = CUR_CHAR(l);
3704
3705
3.40M
    while (xmlIsNameChar(ctxt, c)) {
3706
2.87M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
2.87M
  COPY_BUF(l,buf,len,c);
3711
2.87M
  NEXTL(l);
3712
2.87M
  c = CUR_CHAR(l);
3713
2.87M
  if (c == 0) {
3714
3.28k
      count = 0;
3715
3.28k
      GROW;
3716
3.28k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
3.28k
            c = CUR_CHAR(l);
3719
3.28k
  }
3720
2.87M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
5.02k
      xmlChar *buffer;
3726
5.02k
      int max = len * 2;
3727
3728
5.02k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
5.02k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
5.02k
      memcpy(buffer, buf, len);
3734
1.94M
      while (xmlIsNameChar(ctxt, c)) {
3735
1.93M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
21.6k
        count = 0;
3737
21.6k
        GROW;
3738
21.6k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
21.6k
    }
3743
1.93M
    if (len + 10 > max) {
3744
3.78k
        xmlChar *tmp;
3745
3746
3.78k
        max *= 2;
3747
3.78k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
3.78k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
3.78k
        buffer = tmp;
3754
3.78k
    }
3755
1.93M
    COPY_BUF(l,buffer,len,c);
3756
1.93M
    NEXTL(l);
3757
1.93M
    c = CUR_CHAR(l);
3758
1.93M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
1.93M
      }
3764
5.02k
      buffer[len] = 0;
3765
5.02k
      return(buffer);
3766
5.02k
  }
3767
2.87M
    }
3768
530k
    if (len == 0)
3769
118k
        return(NULL);
3770
411k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
411k
    return(xmlStrndup(buf, len));
3775
411k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
793k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
793k
    xmlChar *buf = NULL;
3795
793k
    int len = 0;
3796
793k
    int size = XML_PARSER_BUFFER_SIZE;
3797
793k
    int c, l;
3798
793k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
195k
                    XML_MAX_HUGE_LENGTH :
3800
793k
                    XML_MAX_TEXT_LENGTH;
3801
793k
    xmlChar stop;
3802
793k
    xmlChar *ret = NULL;
3803
793k
    const xmlChar *cur = NULL;
3804
793k
    xmlParserInputPtr input;
3805
3806
793k
    if (RAW == '"') stop = '"';
3807
161k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
793k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
793k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
793k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
793k
    input = ctxt->input;
3824
793k
    GROW;
3825
793k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
793k
    NEXT;
3828
793k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
50.9M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
50.9M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
50.1M
  if (len + 5 >= size) {
3841
126k
      xmlChar *tmp;
3842
3843
126k
      size *= 2;
3844
126k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
126k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
126k
      buf = tmp;
3850
126k
  }
3851
50.1M
  COPY_BUF(l,buf,len,c);
3852
50.1M
  NEXTL(l);
3853
3854
50.1M
  GROW;
3855
50.1M
  c = CUR_CHAR(l);
3856
50.1M
  if (c == 0) {
3857
3.36k
      GROW;
3858
3.36k
      c = CUR_CHAR(l);
3859
3.36k
  }
3860
3861
50.1M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
50.1M
    }
3867
793k
    buf[len] = 0;
3868
793k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
793k
    if (c != stop) {
3871
5.18k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
5.18k
        goto error;
3873
5.18k
    }
3874
787k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
787k
    cur = buf;
3882
36.0M
    while (*cur != 0) { /* non input consuming */
3883
35.2M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
644k
      xmlChar *name;
3885
644k
      xmlChar tmp = *cur;
3886
644k
            int nameOk = 0;
3887
3888
644k
      cur++;
3889
644k
      name = xmlParseStringName(ctxt, &cur);
3890
644k
            if (name != NULL) {
3891
631k
                nameOk = 1;
3892
631k
                xmlFree(name);
3893
631k
            }
3894
644k
            if ((nameOk == 0) || (*cur != ';')) {
3895
27.1k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
27.1k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
27.1k
                            tmp);
3898
27.1k
                goto error;
3899
27.1k
      }
3900
617k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
617k
    (ctxt->inputNr == 1)) {
3902
6.28k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
6.28k
                goto error;
3904
6.28k
      }
3905
611k
      if (*cur == 0)
3906
0
          break;
3907
611k
  }
3908
35.2M
  cur++;
3909
35.2M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
754k
    ++ctxt->depth;
3920
754k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
754k
                                     0, 0, 0, /* check */ 1);
3922
754k
    --ctxt->depth;
3923
3924
754k
    if (orig != NULL) {
3925
754k
        *orig = buf;
3926
754k
        buf = NULL;
3927
754k
    }
3928
3929
793k
error:
3930
793k
    if (buf != NULL)
3931
38.6k
        xmlFree(buf);
3932
793k
    return(ret);
3933
754k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.48M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.48M
    xmlChar limit = 0;
3950
1.48M
    xmlChar *buf = NULL;
3951
1.48M
    xmlChar *rep = NULL;
3952
1.48M
    size_t len = 0;
3953
1.48M
    size_t buf_size = 0;
3954
1.48M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
529k
                       XML_MAX_HUGE_LENGTH :
3956
1.48M
                       XML_MAX_TEXT_LENGTH;
3957
1.48M
    int c, l, in_space = 0;
3958
1.48M
    xmlChar *current = NULL;
3959
1.48M
    xmlEntityPtr ent;
3960
3961
1.48M
    if (NXT(0) == '"') {
3962
1.10M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.10M
  limit = '"';
3964
1.10M
        NEXT;
3965
1.10M
    } else if (NXT(0) == '\'') {
3966
380k
  limit = '\'';
3967
380k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
380k
        NEXT;
3969
380k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.48M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.48M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.48M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.48M
    c = CUR_CHAR(l);
3985
51.1M
    while (((NXT(0) != limit) && /* checked */
3986
51.1M
            (IS_CHAR(c)) && (c != '<')) &&
3987
51.1M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
49.6M
  if (c == '&') {
3989
2.42M
      in_space = 0;
3990
2.42M
      if (NXT(1) == '#') {
3991
687k
    int val = xmlParseCharRef(ctxt);
3992
3993
687k
    if (val == '&') {
3994
39.7k
        if (ctxt->replaceEntities) {
3995
15.2k
      if (len + 10 > buf_size) {
3996
298
          growBuffer(buf, 10);
3997
298
      }
3998
15.2k
      buf[len++] = '&';
3999
24.5k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
24.5k
      if (len + 10 > buf_size) {
4005
334
          growBuffer(buf, 10);
4006
334
      }
4007
24.5k
      buf[len++] = '&';
4008
24.5k
      buf[len++] = '#';
4009
24.5k
      buf[len++] = '3';
4010
24.5k
      buf[len++] = '8';
4011
24.5k
      buf[len++] = ';';
4012
24.5k
        }
4013
648k
    } else if (val != 0) {
4014
572k
        if (len + 10 > buf_size) {
4015
5.14k
      growBuffer(buf, 10);
4016
5.14k
        }
4017
572k
        len += xmlCopyChar(0, &buf[len], val);
4018
572k
    }
4019
1.74M
      } else {
4020
1.74M
    ent = xmlParseEntityRef(ctxt);
4021
1.74M
    if ((ent != NULL) &&
4022
1.74M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
134k
        if (len + 10 > buf_size) {
4024
578
      growBuffer(buf, 10);
4025
578
        }
4026
134k
        if ((ctxt->replaceEntities == 0) &&
4027
134k
            (ent->content[0] == '&')) {
4028
28.0k
      buf[len++] = '&';
4029
28.0k
      buf[len++] = '#';
4030
28.0k
      buf[len++] = '3';
4031
28.0k
      buf[len++] = '8';
4032
28.0k
      buf[len++] = ';';
4033
106k
        } else {
4034
106k
      buf[len++] = ent->content[0];
4035
106k
        }
4036
1.60M
    } else if ((ent != NULL) &&
4037
1.60M
               (ctxt->replaceEntities != 0)) {
4038
911k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
911k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
911k
      ++ctxt->depth;
4043
911k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
911k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
911k
                                /* check */ 1);
4046
911k
      --ctxt->depth;
4047
911k
      if (rep != NULL) {
4048
901k
          current = rep;
4049
175M
          while (*current != 0) { /* non input consuming */
4050
174M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
174M
                                    (*current == 0x9)) {
4052
120k
                                    buf[len++] = 0x20;
4053
120k
                                    current++;
4054
120k
                                } else
4055
174M
                                    buf[len++] = *current++;
4056
174M
        if (len + 10 > buf_size) {
4057
39.4k
            growBuffer(buf, 10);
4058
39.4k
        }
4059
174M
          }
4060
901k
          xmlFree(rep);
4061
901k
          rep = NULL;
4062
901k
      }
4063
911k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
911k
    } else if (ent != NULL) {
4071
394k
        int i = xmlStrlen(ent->name);
4072
394k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
394k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
394k
      (ent->content != NULL)) {
4081
359k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
11.9k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
11.9k
                            ctxt->sizeentcopy = ent->length;
4085
4086
11.9k
                            ++ctxt->depth;
4087
11.9k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
11.9k
                                    ent->content, ent->length,
4089
11.9k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
11.9k
                                    /* check */ 1);
4091
11.9k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
11.9k
                            if (ctxt->inSubset == 0) {
4100
11.3k
                                ent->flags |= XML_ENT_CHECKED;
4101
11.3k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
11.3k
                            }
4103
4104
11.9k
                            if (rep != NULL) {
4105
11.6k
                                xmlFree(rep);
4106
11.6k
                                rep = NULL;
4107
11.6k
                            } else {
4108
261
                                ent->content[0] = 0;
4109
261
                            }
4110
4111
11.9k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
7
                                goto error;
4113
347k
                        } else {
4114
347k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
347k
                        }
4117
359k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
394k
        buf[len++] = '&';
4123
397k
        while (len + i + 10 > buf_size) {
4124
5.80k
      growBuffer(buf, i + 10);
4125
5.80k
        }
4126
1.07M
        for (;i > 0;i--)
4127
684k
      buf[len++] = *cur++;
4128
394k
        buf[len++] = ';';
4129
394k
    }
4130
1.74M
      }
4131
47.2M
  } else {
4132
47.2M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
3.88M
          if ((len != 0) || (!normalize)) {
4134
3.75M
        if ((!normalize) || (!in_space)) {
4135
3.55M
      COPY_BUF(l,buf,len,0x20);
4136
3.56M
      while (len + 10 > buf_size) {
4137
21.7k
          growBuffer(buf, 10);
4138
21.7k
      }
4139
3.55M
        }
4140
3.75M
        in_space = 1;
4141
3.75M
    }
4142
43.3M
      } else {
4143
43.3M
          in_space = 0;
4144
43.3M
    COPY_BUF(l,buf,len,c);
4145
43.3M
    if (len + 10 > buf_size) {
4146
231k
        growBuffer(buf, 10);
4147
231k
    }
4148
43.3M
      }
4149
47.2M
      NEXTL(l);
4150
47.2M
  }
4151
49.6M
  GROW;
4152
49.6M
  c = CUR_CHAR(l);
4153
49.6M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
49.6M
    }
4159
1.48M
    if (ctxt->instate == XML_PARSER_EOF)
4160
779
        goto error;
4161
4162
1.48M
    if ((in_space) && (normalize)) {
4163
77.4k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
37.4k
    }
4165
1.48M
    buf[len] = 0;
4166
1.48M
    if (RAW == '<') {
4167
566k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
922k
    } else if (RAW != limit) {
4169
251k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
140k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
140k
         "invalid character in attribute value\n");
4172
140k
  } else {
4173
111k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
111k
         "AttValue: ' expected\n");
4175
111k
        }
4176
251k
    } else
4177
670k
  NEXT;
4178
4179
1.48M
    if (attlen != NULL) *attlen = len;
4180
1.48M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
786
error:
4185
786
    if (buf != NULL)
4186
786
        xmlFree(buf);
4187
786
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
786
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
2.45M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
2.45M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
2.45M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
2.45M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
288k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
288k
    xmlChar *buf = NULL;
4250
288k
    int len = 0;
4251
288k
    int size = XML_PARSER_BUFFER_SIZE;
4252
288k
    int cur, l;
4253
288k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
90.0k
                    XML_MAX_TEXT_LENGTH :
4255
288k
                    XML_MAX_NAME_LENGTH;
4256
288k
    xmlChar stop;
4257
288k
    int state = ctxt->instate;
4258
288k
    int count = 0;
4259
4260
288k
    SHRINK;
4261
288k
    if (RAW == '"') {
4262
210k
        NEXT;
4263
210k
  stop = '"';
4264
210k
    } else if (RAW == '\'') {
4265
66.1k
        NEXT;
4266
66.1k
  stop = '\'';
4267
66.1k
    } else {
4268
12.3k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
12.3k
  return(NULL);
4270
12.3k
    }
4271
4272
276k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
276k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
276k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
276k
    cur = CUR_CHAR(l);
4279
12.1M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
11.8M
  if (len + 5 >= size) {
4281
18.6k
      xmlChar *tmp;
4282
4283
18.6k
      size *= 2;
4284
18.6k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
18.6k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
18.6k
      buf = tmp;
4292
18.6k
  }
4293
11.8M
  count++;
4294
11.8M
  if (count > 50) {
4295
151k
      SHRINK;
4296
151k
      GROW;
4297
151k
      count = 0;
4298
151k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
151k
  }
4303
11.8M
  COPY_BUF(l,buf,len,cur);
4304
11.8M
  NEXTL(l);
4305
11.8M
  cur = CUR_CHAR(l);
4306
11.8M
  if (cur == 0) {
4307
4.58k
      GROW;
4308
4.58k
      SHRINK;
4309
4.58k
      cur = CUR_CHAR(l);
4310
4.58k
  }
4311
11.8M
        if (len > maxLength) {
4312
25
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
25
            xmlFree(buf);
4314
25
            ctxt->instate = (xmlParserInputState) state;
4315
25
            return(NULL);
4316
25
        }
4317
11.8M
    }
4318
276k
    buf[len] = 0;
4319
276k
    ctxt->instate = (xmlParserInputState) state;
4320
276k
    if (!IS_CHAR(cur)) {
4321
8.25k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
268k
    } else {
4323
268k
  NEXT;
4324
268k
    }
4325
276k
    return(buf);
4326
276k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
98.2k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
98.2k
    xmlChar *buf = NULL;
4344
98.2k
    int len = 0;
4345
98.2k
    int size = XML_PARSER_BUFFER_SIZE;
4346
98.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
29.6k
                    XML_MAX_TEXT_LENGTH :
4348
98.2k
                    XML_MAX_NAME_LENGTH;
4349
98.2k
    xmlChar cur;
4350
98.2k
    xmlChar stop;
4351
98.2k
    int count = 0;
4352
98.2k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
98.2k
    SHRINK;
4355
98.2k
    if (RAW == '"') {
4356
56.6k
        NEXT;
4357
56.6k
  stop = '"';
4358
56.6k
    } else if (RAW == '\'') {
4359
39.7k
        NEXT;
4360
39.7k
  stop = '\'';
4361
39.7k
    } else {
4362
1.84k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
1.84k
  return(NULL);
4364
1.84k
    }
4365
96.4k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
96.4k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
96.4k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
96.4k
    cur = CUR;
4372
3.42M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
3.33M
  if (len + 1 >= size) {
4374
3.72k
      xmlChar *tmp;
4375
4376
3.72k
      size *= 2;
4377
3.72k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
3.72k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
3.72k
      buf = tmp;
4384
3.72k
  }
4385
3.33M
  buf[len++] = cur;
4386
3.33M
  count++;
4387
3.33M
  if (count > 50) {
4388
30.2k
      SHRINK;
4389
30.2k
      GROW;
4390
30.2k
      count = 0;
4391
30.2k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
30.2k
  }
4396
3.33M
  NEXT;
4397
3.33M
  cur = CUR;
4398
3.33M
  if (cur == 0) {
4399
844
      GROW;
4400
844
      SHRINK;
4401
844
      cur = CUR;
4402
844
  }
4403
3.33M
        if (len > maxLength) {
4404
1
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
1
            xmlFree(buf);
4406
1
            return(NULL);
4407
1
        }
4408
3.33M
    }
4409
96.4k
    buf[len] = 0;
4410
96.4k
    if (cur != stop) {
4411
10.0k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
86.4k
    } else {
4413
86.4k
  NEXT;
4414
86.4k
    }
4415
96.4k
    ctxt->instate = oldstate;
4416
96.4k
    return(buf);
4417
96.4k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
30.8M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
30.8M
    const xmlChar *in;
4482
30.8M
    int nbchar = 0;
4483
30.8M
    int line = ctxt->input->line;
4484
30.8M
    int col = ctxt->input->col;
4485
30.8M
    int ccol;
4486
4487
30.8M
    SHRINK;
4488
30.8M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
30.8M
    in = ctxt->input->cur;
4494
34.1M
    do {
4495
39.2M
get_more_space:
4496
57.6M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
39.2M
        if (*in == 0xA) {
4498
5.40M
            do {
4499
5.40M
                ctxt->input->line++; ctxt->input->col = 1;
4500
5.40M
                in++;
4501
5.40M
            } while (*in == 0xA);
4502
5.12M
            goto get_more_space;
4503
5.12M
        }
4504
34.1M
        if (*in == '<') {
4505
5.44M
            nbchar = in - ctxt->input->cur;
4506
5.44M
            if (nbchar > 0) {
4507
5.44M
                const xmlChar *tmp = ctxt->input->cur;
4508
5.44M
                ctxt->input->cur = in;
4509
4510
5.44M
                if ((ctxt->sax != NULL) &&
4511
5.44M
                    (ctxt->sax->ignorableWhitespace !=
4512
5.44M
                     ctxt->sax->characters)) {
4513
2.51M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.55M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.55M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.55M
                                                   tmp, nbchar);
4517
1.55M
                    } else {
4518
959k
                        if (ctxt->sax->characters != NULL)
4519
959k
                            ctxt->sax->characters(ctxt->userData,
4520
959k
                                                  tmp, nbchar);
4521
959k
                        if (*ctxt->space == -1)
4522
292k
                            *ctxt->space = -2;
4523
959k
                    }
4524
2.93M
                } else if ((ctxt->sax != NULL) &&
4525
2.93M
                           (ctxt->sax->characters != NULL)) {
4526
2.93M
                    ctxt->sax->characters(ctxt->userData,
4527
2.93M
                                          tmp, nbchar);
4528
2.93M
                }
4529
5.44M
            }
4530
5.44M
            return;
4531
5.44M
        }
4532
4533
33.9M
get_more:
4534
33.9M
        ccol = ctxt->input->col;
4535
276M
        while (test_char_data[*in]) {
4536
242M
            in++;
4537
242M
            ccol++;
4538
242M
        }
4539
33.9M
        ctxt->input->col = ccol;
4540
33.9M
        if (*in == 0xA) {
4541
4.58M
            do {
4542
4.58M
                ctxt->input->line++; ctxt->input->col = 1;
4543
4.58M
                in++;
4544
4.58M
            } while (*in == 0xA);
4545
4.13M
            goto get_more;
4546
4.13M
        }
4547
29.8M
        if (*in == ']') {
4548
1.30M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
134k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
134k
                ctxt->input->cur = in + 1;
4551
134k
                return;
4552
134k
            }
4553
1.16M
            in++;
4554
1.16M
            ctxt->input->col++;
4555
1.16M
            goto get_more;
4556
1.30M
        }
4557
28.5M
        nbchar = in - ctxt->input->cur;
4558
28.5M
        if (nbchar > 0) {
4559
13.5M
            if ((ctxt->sax != NULL) &&
4560
13.5M
                (ctxt->sax->ignorableWhitespace !=
4561
13.5M
                 ctxt->sax->characters) &&
4562
13.5M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
1.41M
                const xmlChar *tmp = ctxt->input->cur;
4564
1.41M
                ctxt->input->cur = in;
4565
4566
1.41M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
327k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
327k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
327k
                                                       tmp, nbchar);
4570
1.08M
                } else {
4571
1.08M
                    if (ctxt->sax->characters != NULL)
4572
1.08M
                        ctxt->sax->characters(ctxt->userData,
4573
1.08M
                                              tmp, nbchar);
4574
1.08M
                    if (*ctxt->space == -1)
4575
537k
                        *ctxt->space = -2;
4576
1.08M
                }
4577
1.41M
                line = ctxt->input->line;
4578
1.41M
                col = ctxt->input->col;
4579
12.0M
            } else if (ctxt->sax != NULL) {
4580
12.0M
                if (ctxt->sax->characters != NULL)
4581
12.0M
                    ctxt->sax->characters(ctxt->userData,
4582
12.0M
                                          ctxt->input->cur, nbchar);
4583
12.0M
                line = ctxt->input->line;
4584
12.0M
                col = ctxt->input->col;
4585
12.0M
            }
4586
13.5M
        }
4587
28.5M
        ctxt->input->cur = in;
4588
28.5M
        if (*in == 0xD) {
4589
3.51M
            in++;
4590
3.51M
            if (*in == 0xA) {
4591
3.29M
                ctxt->input->cur = in;
4592
3.29M
                in++;
4593
3.29M
                ctxt->input->line++; ctxt->input->col = 1;
4594
3.29M
                continue; /* while */
4595
3.29M
            }
4596
217k
            in--;
4597
217k
        }
4598
25.2M
        if (*in == '<') {
4599
7.90M
            return;
4600
7.90M
        }
4601
17.3M
        if (*in == '&') {
4602
1.29M
            return;
4603
1.29M
        }
4604
16.0M
        SHRINK;
4605
16.0M
        GROW;
4606
16.0M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
16.0M
        in = ctxt->input->cur;
4609
19.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
19.3M
             (*in == 0x09) || (*in == 0x0a));
4611
16.1M
    ctxt->input->line = line;
4612
16.1M
    ctxt->input->col = col;
4613
16.1M
    xmlParseCharDataComplex(ctxt);
4614
16.1M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
16.1M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
16.1M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
16.1M
    int nbchar = 0;
4631
16.1M
    int cur, l;
4632
16.1M
    int count = 0;
4633
4634
16.1M
    SHRINK;
4635
16.1M
    GROW;
4636
16.1M
    cur = CUR_CHAR(l);
4637
126M
    while ((cur != '<') && /* checked */
4638
126M
           (cur != '&') &&
4639
126M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
109M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
53.6k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
53.6k
  }
4643
109M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
109M
  NEXTL(l);
4646
109M
  cur = CUR_CHAR(l);
4647
109M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
72.4k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
72.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
66.5k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
66.5k
    } else {
4659
66.5k
        if (ctxt->sax->characters != NULL)
4660
66.5k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
66.5k
        if ((ctxt->sax->characters !=
4662
66.5k
             ctxt->sax->ignorableWhitespace) &&
4663
66.5k
      (*ctxt->space == -1))
4664
3.78k
      *ctxt->space = -2;
4665
66.5k
    }
4666
66.5k
      }
4667
72.4k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
72.4k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
72.4k
  }
4672
109M
  count++;
4673
109M
  if (count > 50) {
4674
1.00M
      SHRINK;
4675
1.00M
      GROW;
4676
1.00M
      count = 0;
4677
1.00M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.00M
  }
4680
109M
    }
4681
16.1M
    if (nbchar != 0) {
4682
3.96M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
3.96M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
3.66M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
17.4k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
17.4k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
3.65M
      } else {
4691
3.65M
    if (ctxt->sax->characters != NULL)
4692
3.65M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
3.65M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
3.65M
        (*ctxt->space == -1))
4695
460k
        *ctxt->space = -2;
4696
3.65M
      }
4697
3.66M
  }
4698
3.96M
    }
4699
16.1M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
12.9M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
12.9M
                          "PCDATA invalid Char value %d\n",
4703
12.9M
                    cur ? cur : CUR);
4704
12.9M
  NEXT;
4705
12.9M
    }
4706
16.1M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
579k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
579k
    xmlChar *URI = NULL;
4735
4736
579k
    SHRINK;
4737
4738
579k
    *publicID = NULL;
4739
579k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
206k
        SKIP(6);
4741
206k
  if (SKIP_BLANKS == 0) {
4742
1.02k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
1.02k
                     "Space required after 'SYSTEM'\n");
4744
1.02k
  }
4745
206k
  URI = xmlParseSystemLiteral(ctxt);
4746
206k
  if (URI == NULL) {
4747
2.62k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
2.62k
        }
4749
372k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
98.2k
        SKIP(6);
4751
98.2k
  if (SKIP_BLANKS == 0) {
4752
2.04k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
2.04k
        "Space required after 'PUBLIC'\n");
4754
2.04k
  }
4755
98.2k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
98.2k
  if (*publicID == NULL) {
4757
1.84k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
1.84k
  }
4759
98.2k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
81.8k
      if (SKIP_BLANKS == 0) {
4764
9.66k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
9.66k
      "Space required after the Public Identifier\n");
4766
9.66k
      }
4767
81.8k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
16.3k
      if (SKIP_BLANKS == 0) return(NULL);
4775
746
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
746
  }
4777
82.2k
  URI = xmlParseSystemLiteral(ctxt);
4778
82.2k
  if (URI == NULL) {
4779
9.72k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
9.72k
        }
4781
82.2k
    }
4782
563k
    return(URI);
4783
579k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
207k
                       size_t len, size_t size) {
4802
207k
    int q, ql;
4803
207k
    int r, rl;
4804
207k
    int cur, l;
4805
207k
    size_t count = 0;
4806
207k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
65.7k
                       XML_MAX_HUGE_LENGTH :
4808
207k
                       XML_MAX_TEXT_LENGTH;
4809
207k
    int inputid;
4810
4811
207k
    inputid = ctxt->input->id;
4812
4813
207k
    if (buf == NULL) {
4814
22.5k
        len = 0;
4815
22.5k
  size = XML_PARSER_BUFFER_SIZE;
4816
22.5k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
22.5k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
22.5k
    }
4822
207k
    GROW; /* Assure there's enough input data */
4823
207k
    q = CUR_CHAR(ql);
4824
207k
    if (q == 0)
4825
20.1k
        goto not_terminated;
4826
187k
    if (!IS_CHAR(q)) {
4827
24.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
24.7k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
24.7k
                    q);
4830
24.7k
  xmlFree (buf);
4831
24.7k
  return;
4832
24.7k
    }
4833
162k
    NEXTL(ql);
4834
162k
    r = CUR_CHAR(rl);
4835
162k
    if (r == 0)
4836
2.73k
        goto not_terminated;
4837
159k
    if (!IS_CHAR(r)) {
4838
7.42k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
7.42k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
7.42k
                    r);
4841
7.42k
  xmlFree (buf);
4842
7.42k
  return;
4843
7.42k
    }
4844
152k
    NEXTL(rl);
4845
152k
    cur = CUR_CHAR(l);
4846
152k
    if (cur == 0)
4847
2.30k
        goto not_terminated;
4848
17.3M
    while (IS_CHAR(cur) && /* checked */
4849
17.3M
           ((cur != '>') ||
4850
17.2M
      (r != '-') || (q != '-'))) {
4851
17.1M
  if ((r == '-') && (q == '-')) {
4852
126k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
126k
  }
4854
17.1M
  if (len + 5 >= size) {
4855
62.1k
      xmlChar *new_buf;
4856
62.1k
            size_t new_size;
4857
4858
62.1k
      new_size = size * 2;
4859
62.1k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
62.1k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
62.1k
      buf = new_buf;
4866
62.1k
            size = new_size;
4867
62.1k
  }
4868
17.1M
  COPY_BUF(ql,buf,len,q);
4869
17.1M
  q = r;
4870
17.1M
  ql = rl;
4871
17.1M
  r = cur;
4872
17.1M
  rl = l;
4873
4874
17.1M
  count++;
4875
17.1M
  if (count > 50) {
4876
279k
      SHRINK;
4877
279k
      GROW;
4878
279k
      count = 0;
4879
279k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
279k
  }
4884
17.1M
  NEXTL(l);
4885
17.1M
  cur = CUR_CHAR(l);
4886
17.1M
  if (cur == 0) {
4887
19.0k
      SHRINK;
4888
19.0k
      GROW;
4889
19.0k
      cur = CUR_CHAR(l);
4890
19.0k
  }
4891
4892
17.1M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
17.1M
    }
4899
150k
    buf[len] = 0;
4900
150k
    if (cur == 0) {
4901
19.0k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
19.0k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
131k
    } else if (!IS_CHAR(cur)) {
4904
31.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
31.6k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
31.6k
                    cur);
4907
99.4k
    } else {
4908
99.4k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
99.4k
        NEXT;
4914
99.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
99.4k
      (!ctxt->disableSAX))
4916
79.8k
      ctxt->sax->comment(ctxt->userData, buf);
4917
99.4k
    }
4918
150k
    xmlFree(buf);
4919
150k
    return;
4920
25.1k
not_terminated:
4921
25.1k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
25.1k
       "Comment not terminated\n", NULL);
4923
25.1k
    xmlFree(buf);
4924
25.1k
    return;
4925
150k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
16.0M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
16.0M
    xmlChar *buf = NULL;
4943
16.0M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
16.0M
    size_t len = 0;
4945
16.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
431k
                       XML_MAX_HUGE_LENGTH :
4947
16.0M
                       XML_MAX_TEXT_LENGTH;
4948
16.0M
    xmlParserInputState state;
4949
16.0M
    const xmlChar *in;
4950
16.0M
    size_t nbchar = 0;
4951
16.0M
    int ccol;
4952
16.0M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
16.0M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
16.0M
    SKIP(2);
4960
16.0M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
312
        return;
4962
16.0M
    state = ctxt->instate;
4963
16.0M
    ctxt->instate = XML_PARSER_COMMENT;
4964
16.0M
    inputid = ctxt->input->id;
4965
16.0M
    SKIP(2);
4966
16.0M
    SHRINK;
4967
16.0M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
16.0M
    in = ctxt->input->cur;
4974
16.0M
    do {
4975
16.0M
  if (*in == 0xA) {
4976
260k
      do {
4977
260k
    ctxt->input->line++; ctxt->input->col = 1;
4978
260k
    in++;
4979
260k
      } while (*in == 0xA);
4980
151k
  }
4981
19.4M
get_more:
4982
19.4M
        ccol = ctxt->input->col;
4983
136M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
136M
         ((*in >= 0x20) && (*in < '-')) ||
4985
136M
         (*in == 0x09)) {
4986
117M
        in++;
4987
117M
        ccol++;
4988
117M
  }
4989
19.4M
  ctxt->input->col = ccol;
4990
19.4M
  if (*in == 0xA) {
4991
1.64M
      do {
4992
1.64M
    ctxt->input->line++; ctxt->input->col = 1;
4993
1.64M
    in++;
4994
1.64M
      } while (*in == 0xA);
4995
1.54M
      goto get_more;
4996
1.54M
  }
4997
17.8M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
17.8M
  if (nbchar > 0) {
5002
2.85M
      if ((ctxt->sax != NULL) &&
5003
2.85M
    (ctxt->sax->comment != NULL)) {
5004
2.85M
    if (buf == NULL) {
5005
1.06M
        if ((*in == '-') && (in[1] == '-'))
5006
610k
            size = nbchar + 1;
5007
452k
        else
5008
452k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
1.06M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
1.06M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
1.06M
        len = 0;
5016
1.79M
    } else if (len + nbchar + 1 >= size) {
5017
238k
        xmlChar *new_buf;
5018
238k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
238k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
238k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
238k
        buf = new_buf;
5027
238k
    }
5028
2.85M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
2.85M
    len += nbchar;
5030
2.85M
    buf[len] = 0;
5031
2.85M
      }
5032
2.85M
  }
5033
17.8M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
17.8M
  ctxt->input->cur = in;
5040
17.8M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
17.8M
  if (*in == 0xD) {
5045
588k
      in++;
5046
588k
      if (*in == 0xA) {
5047
570k
    ctxt->input->cur = in;
5048
570k
    in++;
5049
570k
    ctxt->input->line++; ctxt->input->col = 1;
5050
570k
    goto get_more;
5051
570k
      }
5052
17.9k
      in--;
5053
17.9k
  }
5054
17.3M
  SHRINK;
5055
17.3M
  GROW;
5056
17.3M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
17.3M
  in = ctxt->input->cur;
5061
17.3M
  if (*in == '-') {
5062
17.1M
      if (in[1] == '-') {
5063
15.9M
          if (in[2] == '>') {
5064
15.8M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
15.8M
        SKIP(3);
5070
15.8M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
15.8M
            (!ctxt->disableSAX)) {
5072
9.52M
      if (buf != NULL)
5073
778k
          ctxt->sax->comment(ctxt->userData, buf);
5074
8.74M
      else
5075
8.74M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
9.52M
        }
5077
15.8M
        if (buf != NULL)
5078
877k
            xmlFree(buf);
5079
15.8M
        if (ctxt->instate != XML_PARSER_EOF)
5080
15.8M
      ctxt->instate = state;
5081
15.8M
        return;
5082
15.8M
    }
5083
141k
    if (buf != NULL) {
5084
132k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
132k
                          "Double hyphen within comment: "
5086
132k
                                      "<!--%.50s\n",
5087
132k
              buf);
5088
132k
    } else
5089
8.67k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
8.67k
                          "Double hyphen within comment\n", NULL);
5091
141k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
141k
    in++;
5096
141k
    ctxt->input->col++;
5097
141k
      }
5098
1.24M
      in++;
5099
1.24M
      ctxt->input->col++;
5100
1.24M
      goto get_more;
5101
17.1M
  }
5102
17.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
207k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
207k
    ctxt->instate = state;
5105
207k
    return;
5106
16.0M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
660k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
660k
    const xmlChar *name;
5125
5126
660k
    name = xmlParseName(ctxt);
5127
660k
    if ((name != NULL) &&
5128
660k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
660k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
660k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
212k
  int i;
5132
212k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
212k
      (name[2] == 'l') && (name[3] == 0)) {
5134
135k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
135k
     "XML declaration allowed only at the start of the document\n");
5136
135k
      return(name);
5137
135k
  } else if (name[3] == 0) {
5138
16.8k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
16.8k
      return(name);
5140
16.8k
  }
5141
165k
  for (i = 0;;i++) {
5142
165k
      if (xmlW3CPIs[i] == NULL) break;
5143
113k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
8.04k
          return(name);
5145
113k
  }
5146
52.1k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
52.1k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
52.1k
          NULL, NULL);
5149
52.1k
    }
5150
499k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
35.5k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
35.5k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
35.5k
    }
5154
499k
    return(name);
5155
660k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
386
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
386
    xmlChar *URL = NULL;
5176
386
    const xmlChar *tmp, *base;
5177
386
    xmlChar marker;
5178
5179
386
    tmp = catalog;
5180
386
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
386
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
287
  goto error;
5183
99
    tmp += 7;
5184
99
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
99
    if (*tmp != '=') {
5186
99
  return;
5187
99
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
287
error:
5211
287
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
287
            "Catalog PI syntax error: %s\n",
5213
287
      catalog, NULL);
5214
287
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
287
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
660k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
660k
    xmlChar *buf = NULL;
5235
660k
    size_t len = 0;
5236
660k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
660k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
230k
                       XML_MAX_HUGE_LENGTH :
5239
660k
                       XML_MAX_TEXT_LENGTH;
5240
660k
    int cur, l;
5241
660k
    const xmlChar *target;
5242
660k
    xmlParserInputState state;
5243
660k
    int count = 0;
5244
5245
660k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
660k
  int inputid = ctxt->input->id;
5247
660k
  state = ctxt->instate;
5248
660k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
660k
  SKIP(2);
5253
660k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
660k
        target = xmlParsePITarget(ctxt);
5260
660k
  if (target != NULL) {
5261
574k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
99.5k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
99.5k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
99.5k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
99.5k
        (ctxt->sax->processingInstruction != NULL))
5274
87.6k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
87.6k
                                         target, NULL);
5276
99.5k
    if (ctxt->instate != XML_PARSER_EOF)
5277
99.5k
        ctxt->instate = state;
5278
99.5k
    return;
5279
99.5k
      }
5280
475k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
475k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
475k
      if (SKIP_BLANKS == 0) {
5287
200k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
200k
        "ParsePI: PI %s space expected\n", target);
5289
200k
      }
5290
475k
      cur = CUR_CHAR(l);
5291
28.3M
      while (IS_CHAR(cur) && /* checked */
5292
28.3M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
27.8M
    if (len + 5 >= size) {
5294
67.9k
        xmlChar *tmp;
5295
67.9k
                    size_t new_size = size * 2;
5296
67.9k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
67.9k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
67.9k
        buf = tmp;
5304
67.9k
                    size = new_size;
5305
67.9k
    }
5306
27.8M
    count++;
5307
27.8M
    if (count > 50) {
5308
388k
        SHRINK;
5309
388k
        GROW;
5310
388k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
388k
        count = 0;
5315
388k
    }
5316
27.8M
    COPY_BUF(l,buf,len,cur);
5317
27.8M
    NEXTL(l);
5318
27.8M
    cur = CUR_CHAR(l);
5319
27.8M
    if (cur == 0) {
5320
34.9k
        SHRINK;
5321
34.9k
        GROW;
5322
34.9k
        cur = CUR_CHAR(l);
5323
34.9k
    }
5324
27.8M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
27.8M
      }
5332
475k
      buf[len] = 0;
5333
475k
      if (cur != '?') {
5334
96.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
96.7k
          "ParsePI: PI %s never end ...\n", target);
5336
378k
      } else {
5337
378k
    if (inputid != ctxt->input->id) {
5338
24
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
24
                             "PI declaration doesn't start and stop in"
5340
24
                                   " the same entity\n");
5341
24
    }
5342
378k
    SKIP(2);
5343
5344
378k
#ifdef LIBXML_CATALOG_ENABLED
5345
378k
    if (((state == XML_PARSER_MISC) ||
5346
378k
               (state == XML_PARSER_START)) &&
5347
378k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
386
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
386
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
386
      (allow == XML_CATA_ALLOW_ALL))
5351
386
      xmlParseCatalogPI(ctxt, buf);
5352
386
    }
5353
378k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
378k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
378k
        (ctxt->sax->processingInstruction != NULL))
5361
331k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
331k
                                         target, buf);
5363
378k
      }
5364
475k
      xmlFree(buf);
5365
475k
  } else {
5366
85.1k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
85.1k
  }
5368
560k
  if (ctxt->instate != XML_PARSER_EOF)
5369
560k
      ctxt->instate = state;
5370
560k
    }
5371
660k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
37.2k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
37.2k
    const xmlChar *name;
5394
37.2k
    xmlChar *Pubid;
5395
37.2k
    xmlChar *Systemid;
5396
5397
37.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
37.2k
    SKIP(2);
5400
5401
37.2k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
36.0k
  int inputid = ctxt->input->id;
5403
36.0k
  SHRINK;
5404
36.0k
  SKIP(8);
5405
36.0k
  if (SKIP_BLANKS == 0) {
5406
549
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
549
         "Space required after '<!NOTATION'\n");
5408
549
      return;
5409
549
  }
5410
5411
35.5k
        name = xmlParseName(ctxt);
5412
35.5k
  if (name == NULL) {
5413
755
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
755
      return;
5415
755
  }
5416
34.7k
  if (xmlStrchr(name, ':') != NULL) {
5417
506
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
506
         "colons are forbidden from notation names '%s'\n",
5419
506
         name, NULL, NULL);
5420
506
  }
5421
34.7k
  if (SKIP_BLANKS == 0) {
5422
2.95k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
2.95k
         "Space required after the NOTATION name'\n");
5424
2.95k
      return;
5425
2.95k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
31.8k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
31.8k
  SKIP_BLANKS;
5432
5433
31.8k
  if (RAW == '>') {
5434
23.4k
      if (inputid != ctxt->input->id) {
5435
12
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
12
                         "Notation declaration doesn't start and stop"
5437
12
                               " in the same entity\n");
5438
12
      }
5439
23.4k
      NEXT;
5440
23.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
23.4k
    (ctxt->sax->notationDecl != NULL))
5442
18.2k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
23.4k
  } else {
5444
8.38k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
8.38k
  }
5446
31.8k
  if (Systemid != NULL) xmlFree(Systemid);
5447
31.8k
  if (Pubid != NULL) xmlFree(Pubid);
5448
31.8k
    }
5449
37.2k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
920k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
920k
    const xmlChar *name = NULL;
5478
920k
    xmlChar *value = NULL;
5479
920k
    xmlChar *URI = NULL, *literal = NULL;
5480
920k
    const xmlChar *ndata = NULL;
5481
920k
    int isParameter = 0;
5482
920k
    xmlChar *orig = NULL;
5483
5484
920k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
920k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
920k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
917k
  int inputid = ctxt->input->id;
5491
917k
  SHRINK;
5492
917k
  SKIP(6);
5493
917k
  if (SKIP_BLANKS == 0) {
5494
17.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
17.9k
         "Space required after '<!ENTITY'\n");
5496
17.9k
  }
5497
5498
917k
  if (RAW == '%') {
5499
391k
      NEXT;
5500
391k
      if (SKIP_BLANKS == 0) {
5501
3.57k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
3.57k
             "Space required after '%%'\n");
5503
3.57k
      }
5504
391k
      isParameter = 1;
5505
391k
  }
5506
5507
917k
        name = xmlParseName(ctxt);
5508
917k
  if (name == NULL) {
5509
14.1k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
14.1k
                     "xmlParseEntityDecl: no name\n");
5511
14.1k
            return;
5512
14.1k
  }
5513
903k
  if (xmlStrchr(name, ':') != NULL) {
5514
3.04k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
3.04k
         "colons are forbidden from entities names '%s'\n",
5516
3.04k
         name, NULL, NULL);
5517
3.04k
  }
5518
903k
  if (SKIP_BLANKS == 0) {
5519
12.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
12.7k
         "Space required after the entity name\n");
5521
12.7k
  }
5522
5523
903k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
903k
  if (isParameter) {
5528
388k
      if ((RAW == '"') || (RAW == '\'')) {
5529
356k
          value = xmlParseEntityValue(ctxt, &orig);
5530
356k
    if (value) {
5531
321k
        if ((ctxt->sax != NULL) &&
5532
321k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
296k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
296k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
296k
            NULL, NULL, value);
5536
321k
    }
5537
356k
      } else {
5538
31.8k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
31.8k
    if ((URI == NULL) && (literal == NULL)) {
5540
2.87k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
2.87k
    }
5542
31.8k
    if (URI) {
5543
28.3k
        xmlURIPtr uri;
5544
5545
28.3k
        uri = xmlParseURI((const char *) URI);
5546
28.3k
        if (uri == NULL) {
5547
3.30k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
3.30k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
25.0k
        } else {
5555
25.0k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
129
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
24.8k
      } else {
5562
24.8k
          if ((ctxt->sax != NULL) &&
5563
24.8k
        (!ctxt->disableSAX) &&
5564
24.8k
        (ctxt->sax->entityDecl != NULL))
5565
23.4k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
23.4k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
23.4k
              literal, URI, NULL);
5568
24.8k
      }
5569
25.0k
      xmlFreeURI(uri);
5570
25.0k
        }
5571
28.3k
    }
5572
31.8k
      }
5573
514k
  } else {
5574
514k
      if ((RAW == '"') || (RAW == '\'')) {
5575
436k
          value = xmlParseEntityValue(ctxt, &orig);
5576
436k
    if ((ctxt->sax != NULL) &&
5577
436k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
381k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
381k
        XML_INTERNAL_GENERAL_ENTITY,
5580
381k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
436k
    if ((ctxt->myDoc == NULL) ||
5585
436k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
9.10k
        if (ctxt->myDoc == NULL) {
5587
1.61k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
1.61k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
1.61k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
1.61k
        }
5594
9.10k
        if (ctxt->myDoc->intSubset == NULL)
5595
1.61k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
1.61k
              BAD_CAST "fake", NULL, NULL);
5597
5598
9.10k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
9.10k
                    NULL, NULL, value);
5600
9.10k
    }
5601
436k
      } else {
5602
78.5k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
78.5k
    if ((URI == NULL) && (literal == NULL)) {
5604
9.64k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
9.64k
    }
5606
78.5k
    if (URI) {
5607
66.7k
        xmlURIPtr uri;
5608
5609
66.7k
        uri = xmlParseURI((const char *)URI);
5610
66.7k
        if (uri == NULL) {
5611
5.86k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
5.86k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
60.8k
        } else {
5619
60.8k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
3.11k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
3.11k
      }
5626
60.8k
      xmlFreeURI(uri);
5627
60.8k
        }
5628
66.7k
    }
5629
78.5k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
11.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
11.4k
           "Space required before 'NDATA'\n");
5632
11.4k
    }
5633
78.5k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
13.2k
        SKIP(5);
5635
13.2k
        if (SKIP_BLANKS == 0) {
5636
733
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
733
               "Space required after 'NDATA'\n");
5638
733
        }
5639
13.2k
        ndata = xmlParseName(ctxt);
5640
13.2k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
13.2k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
11.4k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
11.4k
            literal, URI, ndata);
5644
65.2k
    } else {
5645
65.2k
        if ((ctxt->sax != NULL) &&
5646
65.2k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
56.4k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
56.4k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
56.4k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
65.2k
        if ((ctxt->replaceEntities != 0) &&
5655
65.2k
      ((ctxt->myDoc == NULL) ||
5656
37.8k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
1.96k
      if (ctxt->myDoc == NULL) {
5658
1.00k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.00k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.00k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.00k
      }
5665
5666
1.96k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.00k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.00k
            BAD_CAST "fake", NULL, NULL);
5669
1.96k
      xmlSAX2EntityDecl(ctxt, name,
5670
1.96k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
1.96k
                  literal, URI, NULL);
5672
1.96k
        }
5673
65.2k
    }
5674
78.5k
      }
5675
514k
  }
5676
903k
  if (ctxt->instate == XML_PARSER_EOF)
5677
892
      goto done;
5678
902k
  SKIP_BLANKS;
5679
902k
  if (RAW != '>') {
5680
31.5k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
31.5k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
31.5k
      xmlHaltParser(ctxt);
5683
870k
  } else {
5684
870k
      if (inputid != ctxt->input->id) {
5685
49
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
49
                         "Entity declaration doesn't start and stop in"
5687
49
                               " the same entity\n");
5688
49
      }
5689
870k
      NEXT;
5690
870k
  }
5691
902k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
753k
      xmlEntityPtr cur = NULL;
5696
5697
753k
      if (isParameter) {
5698
336k
          if ((ctxt->sax != NULL) &&
5699
336k
        (ctxt->sax->getParameterEntity != NULL))
5700
336k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
416k
      } else {
5702
416k
          if ((ctxt->sax != NULL) &&
5703
416k
        (ctxt->sax->getEntity != NULL))
5704
416k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
416k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
31.9k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
31.9k
    }
5708
416k
      }
5709
753k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
600k
    cur->orig = orig;
5711
600k
                orig = NULL;
5712
600k
      }
5713
753k
  }
5714
5715
903k
done:
5716
903k
  if (value != NULL) xmlFree(value);
5717
903k
  if (URI != NULL) xmlFree(URI);
5718
903k
  if (literal != NULL) xmlFree(literal);
5719
903k
        if (orig != NULL) xmlFree(orig);
5720
903k
    }
5721
920k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
1.44M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
1.44M
    int val;
5757
1.44M
    xmlChar *ret;
5758
5759
1.44M
    *value = NULL;
5760
1.44M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
140k
  SKIP(9);
5762
140k
  return(XML_ATTRIBUTE_REQUIRED);
5763
140k
    }
5764
1.30M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
1.02M
  SKIP(8);
5766
1.02M
  return(XML_ATTRIBUTE_IMPLIED);
5767
1.02M
    }
5768
278k
    val = XML_ATTRIBUTE_NONE;
5769
278k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
122k
  SKIP(6);
5771
122k
  val = XML_ATTRIBUTE_FIXED;
5772
122k
  if (SKIP_BLANKS == 0) {
5773
839
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
839
         "Space required after '#FIXED'\n");
5775
839
  }
5776
122k
    }
5777
278k
    ret = xmlParseAttValue(ctxt);
5778
278k
    ctxt->instate = XML_PARSER_DTD;
5779
278k
    if (ret == NULL) {
5780
9.95k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
9.95k
           "Attribute default value declaration error\n");
5782
9.95k
    } else
5783
268k
        *value = ret;
5784
278k
    return(val);
5785
1.30M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
14.1k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
14.1k
    const xmlChar *name;
5809
14.1k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
14.1k
    if (RAW != '(') {
5812
1.01k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1.01k
  return(NULL);
5814
1.01k
    }
5815
13.1k
    SHRINK;
5816
21.4k
    do {
5817
21.4k
        NEXT;
5818
21.4k
  SKIP_BLANKS;
5819
21.4k
        name = xmlParseName(ctxt);
5820
21.4k
  if (name == NULL) {
5821
929
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
929
         "Name expected in NOTATION declaration\n");
5823
929
            xmlFreeEnumeration(ret);
5824
929
      return(NULL);
5825
929
  }
5826
20.5k
  tmp = ret;
5827
46.6k
  while (tmp != NULL) {
5828
29.5k
      if (xmlStrEqual(name, tmp->name)) {
5829
3.37k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
3.37k
    "standalone: attribute notation value token %s duplicated\n",
5831
3.37k
         name, NULL);
5832
3.37k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
3.37k
    break;
5835
3.37k
      }
5836
26.1k
      tmp = tmp->next;
5837
26.1k
  }
5838
20.5k
  if (tmp == NULL) {
5839
17.1k
      cur = xmlCreateEnumeration(name);
5840
17.1k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
17.1k
      if (last == NULL) ret = last = cur;
5845
4.68k
      else {
5846
4.68k
    last->next = cur;
5847
4.68k
    last = cur;
5848
4.68k
      }
5849
17.1k
  }
5850
20.5k
  SKIP_BLANKS;
5851
20.5k
    } while (RAW == '|');
5852
12.2k
    if (RAW != ')') {
5853
3.36k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
3.36k
        xmlFreeEnumeration(ret);
5855
3.36k
  return(NULL);
5856
3.36k
    }
5857
8.85k
    NEXT;
5858
8.85k
    return(ret);
5859
12.2k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
153k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
153k
    xmlChar *name;
5881
153k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
153k
    if (RAW != '(') {
5884
17.4k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
17.4k
  return(NULL);
5886
17.4k
    }
5887
136k
    SHRINK;
5888
366k
    do {
5889
366k
        NEXT;
5890
366k
  SKIP_BLANKS;
5891
366k
        name = xmlParseNmtoken(ctxt);
5892
366k
  if (name == NULL) {
5893
1.89k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
1.89k
      return(ret);
5895
1.89k
  }
5896
364k
  tmp = ret;
5897
910k
  while (tmp != NULL) {
5898
549k
      if (xmlStrEqual(name, tmp->name)) {
5899
2.97k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
2.97k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
2.97k
         name, NULL);
5902
2.97k
    if (!xmlDictOwns(ctxt->dict, name))
5903
2.97k
        xmlFree(name);
5904
2.97k
    break;
5905
2.97k
      }
5906
546k
      tmp = tmp->next;
5907
546k
  }
5908
364k
  if (tmp == NULL) {
5909
361k
      cur = xmlCreateEnumeration(name);
5910
361k
      if (!xmlDictOwns(ctxt->dict, name))
5911
361k
    xmlFree(name);
5912
361k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
361k
      if (last == NULL) ret = last = cur;
5917
226k
      else {
5918
226k
    last->next = cur;
5919
226k
    last = cur;
5920
226k
      }
5921
361k
  }
5922
364k
  SKIP_BLANKS;
5923
364k
    } while (RAW == '|');
5924
134k
    if (RAW != ')') {
5925
5.50k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
5.50k
  return(ret);
5927
5.50k
    }
5928
128k
    NEXT;
5929
128k
    return(ret);
5930
134k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
168k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
168k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
14.5k
  SKIP(8);
5953
14.5k
  if (SKIP_BLANKS == 0) {
5954
412
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
412
         "Space required after 'NOTATION'\n");
5956
412
      return(0);
5957
412
  }
5958
14.1k
  *tree = xmlParseNotationType(ctxt);
5959
14.1k
  if (*tree == NULL) return(0);
5960
8.85k
  return(XML_ATTRIBUTE_NOTATION);
5961
14.1k
    }
5962
153k
    *tree = xmlParseEnumerationType(ctxt);
5963
153k
    if (*tree == NULL) return(0);
5964
134k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
153k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
1.47M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
1.47M
    SHRINK;
6017
1.47M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
488k
  SKIP(5);
6019
488k
  return(XML_ATTRIBUTE_CDATA);
6020
990k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
25.0k
  SKIP(6);
6022
25.0k
  return(XML_ATTRIBUTE_IDREFS);
6023
965k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
54.8k
  SKIP(5);
6025
54.8k
  return(XML_ATTRIBUTE_IDREF);
6026
910k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
360k
        SKIP(2);
6028
360k
  return(XML_ATTRIBUTE_ID);
6029
550k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
11.0k
  SKIP(6);
6031
11.0k
  return(XML_ATTRIBUTE_ENTITY);
6032
539k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
8.15k
  SKIP(8);
6034
8.15k
  return(XML_ATTRIBUTE_ENTITIES);
6035
531k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
66.5k
  SKIP(8);
6037
66.5k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
465k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
296k
  SKIP(7);
6040
296k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
296k
     }
6042
168k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
1.47M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
738k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
738k
    const xmlChar *elemName;
6061
738k
    const xmlChar *attrName;
6062
738k
    xmlEnumerationPtr tree;
6063
6064
738k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
738k
    SKIP(2);
6067
6068
738k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
734k
  int inputid = ctxt->input->id;
6070
6071
734k
  SKIP(7);
6072
734k
  if (SKIP_BLANKS == 0) {
6073
7.41k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
7.41k
                     "Space required after '<!ATTLIST'\n");
6075
7.41k
  }
6076
734k
        elemName = xmlParseName(ctxt);
6077
734k
  if (elemName == NULL) {
6078
3.63k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
3.63k
         "ATTLIST: no name for Element\n");
6080
3.63k
      return;
6081
3.63k
  }
6082
731k
  SKIP_BLANKS;
6083
731k
  GROW;
6084
2.11M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
1.51M
      int type;
6086
1.51M
      int def;
6087
1.51M
      xmlChar *defaultValue = NULL;
6088
6089
1.51M
      GROW;
6090
1.51M
            tree = NULL;
6091
1.51M
      attrName = xmlParseName(ctxt);
6092
1.51M
      if (attrName == NULL) {
6093
21.5k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
21.5k
             "ATTLIST: no name for Attribute\n");
6095
21.5k
    break;
6096
21.5k
      }
6097
1.49M
      GROW;
6098
1.49M
      if (SKIP_BLANKS == 0) {
6099
15.9k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
15.9k
            "Space required after the attribute name\n");
6101
15.9k
    break;
6102
15.9k
      }
6103
6104
1.47M
      type = xmlParseAttributeType(ctxt, &tree);
6105
1.47M
      if (type <= 0) {
6106
24.6k
          break;
6107
24.6k
      }
6108
6109
1.45M
      GROW;
6110
1.45M
      if (SKIP_BLANKS == 0) {
6111
11.0k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
11.0k
             "Space required after the attribute type\n");
6113
11.0k
          if (tree != NULL)
6114
8.09k
        xmlFreeEnumeration(tree);
6115
11.0k
    break;
6116
11.0k
      }
6117
6118
1.44M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
1.44M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
1.44M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
143k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
1.44M
      GROW;
6130
1.44M
            if (RAW != '>') {
6131
1.19M
    if (SKIP_BLANKS == 0) {
6132
57.7k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
57.7k
      "Space required after the attribute default value\n");
6134
57.7k
        if (defaultValue != NULL)
6135
45.6k
      xmlFree(defaultValue);
6136
57.7k
        if (tree != NULL)
6137
10.5k
      xmlFreeEnumeration(tree);
6138
57.7k
        break;
6139
57.7k
    }
6140
1.19M
      }
6141
1.38M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
1.38M
    (ctxt->sax->attributeDecl != NULL))
6143
1.26M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
1.26M
                          type, def, defaultValue, tree);
6145
118k
      else if (tree != NULL)
6146
12.1k
    xmlFreeEnumeration(tree);
6147
6148
1.38M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
1.38M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
1.38M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
157k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
157k
      }
6153
1.38M
      if (ctxt->sax2) {
6154
1.00M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
1.00M
      }
6156
1.38M
      if (defaultValue != NULL)
6157
222k
          xmlFree(defaultValue);
6158
1.38M
      GROW;
6159
1.38M
  }
6160
731k
  if (RAW == '>') {
6161
606k
      if (inputid != ctxt->input->id) {
6162
40
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
40
                               "Attribute list declaration doesn't start and"
6164
40
                               " stop in the same entity\n");
6165
40
      }
6166
606k
      NEXT;
6167
606k
  }
6168
731k
    }
6169
738k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
318k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
318k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
318k
    const xmlChar *elem = NULL;
6196
6197
318k
    GROW;
6198
318k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
318k
  SKIP(7);
6200
318k
  SKIP_BLANKS;
6201
318k
  SHRINK;
6202
318k
  if (RAW == ')') {
6203
232k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
232k
      NEXT;
6209
232k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
232k
      if (ret == NULL)
6211
0
          return(NULL);
6212
232k
      if (RAW == '*') {
6213
389
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
389
    NEXT;
6215
389
      }
6216
232k
      return(ret);
6217
232k
  }
6218
85.3k
  if ((RAW == '(') || (RAW == '|')) {
6219
84.0k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
84.0k
      if (ret == NULL) return(NULL);
6221
84.0k
  }
6222
864k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
780k
      NEXT;
6224
780k
      if (elem == NULL) {
6225
83.9k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
83.9k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
83.9k
    ret->c1 = cur;
6231
83.9k
    if (cur != NULL)
6232
83.9k
        cur->parent = ret;
6233
83.9k
    cur = ret;
6234
696k
      } else {
6235
696k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
696k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
696k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
696k
    if (n->c1 != NULL)
6242
696k
        n->c1->parent = n;
6243
696k
          cur->c2 = n;
6244
696k
    if (n != NULL)
6245
696k
        n->parent = cur;
6246
696k
    cur = n;
6247
696k
      }
6248
780k
      SKIP_BLANKS;
6249
780k
      elem = xmlParseName(ctxt);
6250
780k
      if (elem == NULL) {
6251
741
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
741
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
741
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
741
    return(NULL);
6255
741
      }
6256
779k
      SKIP_BLANKS;
6257
779k
      GROW;
6258
779k
  }
6259
84.5k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
79.2k
      if (elem != NULL) {
6261
79.2k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
79.2k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
79.2k
    if (cur->c2 != NULL)
6264
79.2k
        cur->c2->parent = cur;
6265
79.2k
            }
6266
79.2k
            if (ret != NULL)
6267
79.2k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
79.2k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
79.2k
      SKIP(2);
6274
79.2k
  } else {
6275
5.32k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
5.32k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
5.32k
      return(NULL);
6278
5.32k
  }
6279
6280
84.5k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
79.2k
    return(ret);
6284
318k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
1.52M
                                       int depth) {
6321
1.52M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
1.52M
    const xmlChar *elem;
6323
1.52M
    xmlChar type = 0;
6324
6325
1.52M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
1.52M
        (depth >  2048)) {
6327
192
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
192
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
192
                          depth);
6330
192
  return(NULL);
6331
192
    }
6332
1.52M
    SKIP_BLANKS;
6333
1.52M
    GROW;
6334
1.52M
    if (RAW == '(') {
6335
1.09M
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
1.09M
  NEXT;
6339
1.09M
  SKIP_BLANKS;
6340
1.09M
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
1.09M
                                                           depth + 1);
6342
1.09M
        if (cur == NULL)
6343
1.05M
            return(NULL);
6344
33.2k
  SKIP_BLANKS;
6345
33.2k
  GROW;
6346
430k
    } else {
6347
430k
  elem = xmlParseName(ctxt);
6348
430k
  if (elem == NULL) {
6349
9.38k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
9.38k
      return(NULL);
6351
9.38k
  }
6352
421k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
421k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
421k
  GROW;
6358
421k
  if (RAW == '?') {
6359
26.8k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
26.8k
      NEXT;
6361
394k
  } else if (RAW == '*') {
6362
41.3k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
41.3k
      NEXT;
6364
352k
  } else if (RAW == '+') {
6365
64.0k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
64.0k
      NEXT;
6367
288k
  } else {
6368
288k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
288k
  }
6370
421k
  GROW;
6371
421k
    }
6372
454k
    SKIP_BLANKS;
6373
454k
    SHRINK;
6374
1.61M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
1.19M
        if (RAW == ',') {
6379
357k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
211k
      else if (type != CUR) {
6385
135
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
135
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
135
                      type);
6388
135
    if ((last != NULL) && (last != ret))
6389
135
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
135
    if (ret != NULL)
6391
135
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
135
    return(NULL);
6393
135
      }
6394
357k
      NEXT;
6395
6396
357k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
357k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
357k
      if (last == NULL) {
6404
145k
    op->c1 = ret;
6405
145k
    if (ret != NULL)
6406
145k
        ret->parent = op;
6407
145k
    ret = cur = op;
6408
211k
      } else {
6409
211k
          cur->c2 = op;
6410
211k
    if (op != NULL)
6411
211k
        op->parent = cur;
6412
211k
    op->c1 = last;
6413
211k
    if (last != NULL)
6414
211k
        last->parent = op;
6415
211k
    cur =op;
6416
211k
    last = NULL;
6417
211k
      }
6418
836k
  } else if (RAW == '|') {
6419
815k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
689k
      else if (type != CUR) {
6425
141
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
141
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
141
          type);
6428
141
    if ((last != NULL) && (last != ret))
6429
141
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
141
    if (ret != NULL)
6431
141
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
141
    return(NULL);
6433
141
      }
6434
814k
      NEXT;
6435
6436
814k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
814k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
814k
      if (last == NULL) {
6445
125k
    op->c1 = ret;
6446
125k
    if (ret != NULL)
6447
125k
        ret->parent = op;
6448
125k
    ret = cur = op;
6449
689k
      } else {
6450
689k
          cur->c2 = op;
6451
689k
    if (op != NULL)
6452
689k
        op->parent = cur;
6453
689k
    op->c1 = last;
6454
689k
    if (last != NULL)
6455
689k
        last->parent = op;
6456
689k
    cur =op;
6457
689k
    last = NULL;
6458
689k
      }
6459
814k
  } else {
6460
21.8k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
21.8k
      if ((last != NULL) && (last != ret))
6462
9.11k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
21.8k
      if (ret != NULL)
6464
21.8k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
21.8k
      return(NULL);
6466
21.8k
  }
6467
1.17M
  GROW;
6468
1.17M
  SKIP_BLANKS;
6469
1.17M
  GROW;
6470
1.17M
  if (RAW == '(') {
6471
62.7k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
62.7k
      NEXT;
6474
62.7k
      SKIP_BLANKS;
6475
62.7k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
62.7k
                                                          depth + 1);
6477
62.7k
            if (last == NULL) {
6478
13.1k
    if (ret != NULL)
6479
13.1k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
13.1k
    return(NULL);
6481
13.1k
            }
6482
49.5k
      SKIP_BLANKS;
6483
1.10M
  } else {
6484
1.10M
      elem = xmlParseName(ctxt);
6485
1.10M
      if (elem == NULL) {
6486
2.67k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.67k
    if (ret != NULL)
6488
2.67k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.67k
    return(NULL);
6490
2.67k
      }
6491
1.10M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
1.10M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
1.10M
      if (RAW == '?') {
6498
123k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
123k
    NEXT;
6500
983k
      } else if (RAW == '*') {
6501
68.7k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
68.7k
    NEXT;
6503
914k
      } else if (RAW == '+') {
6504
21.9k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
21.9k
    NEXT;
6506
892k
      } else {
6507
892k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
892k
      }
6509
1.10M
  }
6510
1.15M
  SKIP_BLANKS;
6511
1.15M
  GROW;
6512
1.15M
    }
6513
416k
    if ((cur != NULL) && (last != NULL)) {
6514
246k
        cur->c2 = last;
6515
246k
  if (last != NULL)
6516
246k
      last->parent = cur;
6517
246k
    }
6518
416k
    if (ctxt->input->id != inputchk) {
6519
53
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
53
                       "Element content declaration doesn't start and stop in"
6521
53
                       " the same entity\n");
6522
53
    }
6523
416k
    NEXT;
6524
416k
    if (RAW == '?') {
6525
7.94k
  if (ret != NULL) {
6526
7.94k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
7.94k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
473
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
7.46k
      else
6530
7.46k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
7.94k
  }
6532
7.94k
  NEXT;
6533
408k
    } else if (RAW == '*') {
6534
108k
  if (ret != NULL) {
6535
108k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
108k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
584k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
475k
    if ((cur->c1 != NULL) &&
6543
475k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
475k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
17.2k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
475k
    if ((cur->c2 != NULL) &&
6547
475k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
475k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
3.30k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
475k
    cur = cur->c2;
6551
475k
      }
6552
108k
  }
6553
108k
  NEXT;
6554
300k
    } else if (RAW == '+') {
6555
46.5k
  if (ret != NULL) {
6556
46.5k
      int found = 0;
6557
6558
46.5k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
46.5k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
513
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
46.0k
      else
6562
46.0k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
78.0k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
31.5k
    if ((cur->c1 != NULL) &&
6570
31.5k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
31.5k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
1.69k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
1.69k
        found = 1;
6574
1.69k
    }
6575
31.5k
    if ((cur->c2 != NULL) &&
6576
31.5k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
31.5k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
379
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
379
        found = 1;
6580
379
    }
6581
31.5k
    cur = cur->c2;
6582
31.5k
      }
6583
46.5k
      if (found)
6584
1.92k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
46.5k
  }
6586
46.5k
  NEXT;
6587
46.5k
    }
6588
416k
    return(ret);
6589
454k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
686k
                           xmlElementContentPtr *result) {
6648
6649
686k
    xmlElementContentPtr tree = NULL;
6650
686k
    int inputid = ctxt->input->id;
6651
686k
    int res;
6652
6653
686k
    *result = NULL;
6654
6655
686k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
686k
    NEXT;
6661
686k
    GROW;
6662
686k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
686k
    SKIP_BLANKS;
6665
686k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
318k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
318k
  res = XML_ELEMENT_TYPE_MIXED;
6668
367k
    } else {
6669
367k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
367k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
367k
    }
6672
686k
    SKIP_BLANKS;
6673
686k
    *result = tree;
6674
686k
    return(res);
6675
686k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
913k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
913k
    const xmlChar *name;
6695
913k
    int ret = -1;
6696
913k
    xmlElementContentPtr content  = NULL;
6697
6698
913k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
913k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
913k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
909k
  int inputid = ctxt->input->id;
6705
6706
909k
  SKIP(7);
6707
909k
  if (SKIP_BLANKS == 0) {
6708
5.86k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
5.86k
               "Space required after 'ELEMENT'\n");
6710
5.86k
      return(-1);
6711
5.86k
  }
6712
903k
        name = xmlParseName(ctxt);
6713
903k
  if (name == NULL) {
6714
6.60k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
6.60k
         "xmlParseElementDecl: no name for Element\n");
6716
6.60k
      return(-1);
6717
6.60k
  }
6718
896k
  if (SKIP_BLANKS == 0) {
6719
17.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
17.7k
         "Space required after the element name\n");
6721
17.7k
  }
6722
896k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
183k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
183k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
713k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
713k
             (NXT(2) == 'Y')) {
6730
4.31k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
4.31k
      ret = XML_ELEMENT_TYPE_ANY;
6735
709k
  } else if (RAW == '(') {
6736
686k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
686k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
23.2k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
23.2k
          (ctxt->inputNr == 1)) {
6743
929
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
929
    "PEReference: forbidden within markup decl in internal subset\n");
6745
22.3k
      } else {
6746
22.3k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
22.3k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
22.3k
            }
6749
23.2k
      return(-1);
6750
23.2k
  }
6751
6752
873k
  SKIP_BLANKS;
6753
6754
873k
  if (RAW != '>') {
6755
37.5k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
37.5k
      if (content != NULL) {
6757
4.16k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
4.16k
      }
6759
836k
  } else {
6760
836k
      if (inputid != ctxt->input->id) {
6761
44
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
44
                               "Element declaration doesn't start and stop in"
6763
44
                               " the same entity\n");
6764
44
      }
6765
6766
836k
      NEXT;
6767
836k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
836k
    (ctxt->sax->elementDecl != NULL)) {
6769
740k
    if (content != NULL)
6770
563k
        content->parent = NULL;
6771
740k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
740k
                           content);
6773
740k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
74.2k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
74.2k
    }
6782
740k
      } else if (content != NULL) {
6783
77.6k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
77.6k
      }
6785
836k
  }
6786
873k
    }
6787
877k
    return(ret);
6788
913k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
7.69k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
7.69k
    int *inputIds = NULL;
6806
7.69k
    size_t inputIdsSize = 0;
6807
7.69k
    size_t depth = 0;
6808
6809
30.4k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
30.3k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
16.2k
            int id = ctxt->input->id;
6812
6813
16.2k
            SKIP(3);
6814
16.2k
            SKIP_BLANKS;
6815
6816
16.2k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
11.5k
                SKIP(7);
6818
11.5k
                SKIP_BLANKS;
6819
11.5k
                if (RAW != '[') {
6820
228
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
228
                    xmlHaltParser(ctxt);
6822
228
                    goto error;
6823
228
                }
6824
11.3k
                if (ctxt->input->id != id) {
6825
14
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
14
                                   "All markup of the conditional section is"
6827
14
                                   " not in the same entity\n");
6828
14
                }
6829
11.3k
                NEXT;
6830
6831
11.3k
                if (inputIdsSize <= depth) {
6832
4.09k
                    int *tmp;
6833
6834
4.09k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
4.09k
                    tmp = (int *) xmlRealloc(inputIds,
6836
4.09k
                            inputIdsSize * sizeof(int));
6837
4.09k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
4.09k
                    inputIds = tmp;
6842
4.09k
                }
6843
11.3k
                inputIds[depth] = id;
6844
11.3k
                depth++;
6845
11.3k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
3.05k
                size_t ignoreDepth = 0;
6847
6848
3.05k
                SKIP(6);
6849
3.05k
                SKIP_BLANKS;
6850
3.05k
                if (RAW != '[') {
6851
189
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
189
                    xmlHaltParser(ctxt);
6853
189
                    goto error;
6854
189
                }
6855
2.86k
                if (ctxt->input->id != id) {
6856
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
6
                                   "All markup of the conditional section is"
6858
6
                                   " not in the same entity\n");
6859
6
                }
6860
2.86k
                NEXT;
6861
6862
2.49M
                while (RAW != 0) {
6863
2.49M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
9.41k
                        SKIP(3);
6865
9.41k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
9.41k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
2.48M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
2.48M
                               (NXT(2) == '>')) {
6873
5.45k
                        if (ignoreDepth == 0)
6874
811
                            break;
6875
4.64k
                        SKIP(3);
6876
4.64k
                        ignoreDepth--;
6877
2.47M
                    } else {
6878
2.47M
                        NEXT;
6879
2.47M
                    }
6880
2.49M
                }
6881
6882
2.86k
    if (RAW == 0) {
6883
2.05k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.05k
                    goto error;
6885
2.05k
    }
6886
811
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
811
                SKIP(3);
6892
1.62k
            } else {
6893
1.62k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
1.62k
                xmlHaltParser(ctxt);
6895
1.62k
                goto error;
6896
1.62k
            }
6897
16.2k
        } else if ((depth > 0) &&
6898
14.0k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
3.26k
            depth--;
6900
3.26k
            if (ctxt->input->id != inputIds[depth]) {
6901
132
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
132
                               "All markup of the conditional section is not"
6903
132
                               " in the same entity\n");
6904
132
            }
6905
3.26k
            SKIP(3);
6906
10.8k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
8.80k
            xmlParseMarkupDecl(ctxt);
6908
8.80k
        } else {
6909
2.03k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
2.03k
            xmlHaltParser(ctxt);
6911
2.03k
            goto error;
6912
2.03k
        }
6913
6914
24.2k
        if (depth == 0)
6915
1.41k
            break;
6916
6917
22.7k
        SKIP_BLANKS;
6918
22.7k
        GROW;
6919
22.7k
    }
6920
6921
7.69k
error:
6922
7.69k
    xmlFree(inputIds);
6923
7.69k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
18.1M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
18.1M
    GROW;
6952
18.1M
    if (CUR == '<') {
6953
18.1M
        if (NXT(1) == '!') {
6954
18.1M
      switch (NXT(2)) {
6955
1.83M
          case 'E':
6956
1.83M
        if (NXT(3) == 'L')
6957
913k
      xmlParseElementDecl(ctxt);
6958
922k
        else if (NXT(3) == 'N')
6959
920k
      xmlParseEntityDecl(ctxt);
6960
1.80k
                    else
6961
1.80k
                        SKIP(2);
6962
1.83M
        break;
6963
738k
          case 'A':
6964
738k
        xmlParseAttributeListDecl(ctxt);
6965
738k
        break;
6966
37.2k
          case 'N':
6967
37.2k
        xmlParseNotationDecl(ctxt);
6968
37.2k
        break;
6969
15.5M
          case '-':
6970
15.5M
        xmlParseComment(ctxt);
6971
15.5M
        break;
6972
25.8k
    default:
6973
        /* there is an error but it will be detected later */
6974
25.8k
                    SKIP(2);
6975
25.8k
        break;
6976
18.1M
      }
6977
18.1M
  } else if (NXT(1) == '?') {
6978
30.0k
      xmlParsePI(ctxt);
6979
30.0k
  }
6980
18.1M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
18.1M
    if (ctxt->instate == XML_PARSER_EOF)
6987
32.5k
        return;
6988
6989
18.1M
    ctxt->instate = XML_PARSER_DTD;
6990
18.1M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
13.5k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
13.5k
    xmlChar *version;
7006
13.5k
    const xmlChar *encoding;
7007
13.5k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
13.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
13.0k
  SKIP(5);
7014
13.0k
    } else {
7015
521
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
521
  return;
7017
521
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
13.0k
    oldstate = ctxt->instate;
7021
13.0k
    ctxt->instate = XML_PARSER_START;
7022
7023
13.0k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
13.0k
    version = xmlParseVersionInfo(ctxt);
7032
13.0k
    if (version == NULL)
7033
4.29k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
8.76k
    else {
7035
8.76k
  if (SKIP_BLANKS == 0) {
7036
918
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
918
               "Space needed here\n");
7038
918
  }
7039
8.76k
    }
7040
13.0k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
13.0k
    encoding = xmlParseEncodingDecl(ctxt);
7046
13.0k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
13.0k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
280
        ctxt->instate = oldstate;
7053
280
        return;
7054
280
    }
7055
12.7k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
5.09k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
5.09k
           "Missing encoding in text declaration\n");
7058
5.09k
    }
7059
7060
12.7k
    SKIP_BLANKS;
7061
12.7k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
4.69k
        SKIP(2);
7063
8.08k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
151
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
151
  NEXT;
7067
7.93k
    } else {
7068
7.93k
        int c;
7069
7070
7.93k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
455k
        while ((c = CUR) != 0) {
7072
453k
            NEXT;
7073
453k
            if (c == '>')
7074
5.60k
                break;
7075
453k
        }
7076
7.93k
    }
7077
7078
12.7k
    ctxt->instate = oldstate;
7079
12.7k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
58.5k
                       const xmlChar *SystemID) {
7096
58.5k
    xmlDetectSAX2(ctxt);
7097
58.5k
    GROW;
7098
7099
58.5k
    if ((ctxt->encoding == NULL) &&
7100
58.5k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
58.2k
        xmlChar start[4];
7102
58.2k
  xmlCharEncoding enc;
7103
7104
58.2k
  start[0] = RAW;
7105
58.2k
  start[1] = NXT(1);
7106
58.2k
  start[2] = NXT(2);
7107
58.2k
  start[3] = NXT(3);
7108
58.2k
  enc = xmlDetectCharEncoding(start, 4);
7109
58.2k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
12.6k
      xmlSwitchEncoding(ctxt, enc);
7111
58.2k
    }
7112
7113
58.5k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
11.0k
  xmlParseTextDecl(ctxt);
7115
11.0k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
227
      xmlHaltParser(ctxt);
7120
227
      return;
7121
227
  }
7122
11.0k
    }
7123
58.2k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
58.2k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
58.2k
    ctxt->instate = XML_PARSER_DTD;
7135
58.2k
    ctxt->external = 1;
7136
58.2k
    SKIP_BLANKS;
7137
7.70M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
7.66M
  GROW;
7139
7.66M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
7.69k
            xmlParseConditionalSections(ctxt);
7141
7.65M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
7.63M
            xmlParseMarkupDecl(ctxt);
7143
7.63M
        } else {
7144
18.8k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
18.8k
            xmlHaltParser(ctxt);
7146
18.8k
            return;
7147
18.8k
        }
7148
7.64M
        SKIP_BLANKS;
7149
7.64M
    }
7150
7151
39.4k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
39.4k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
4.25M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
4.25M
    xmlEntityPtr ent;
7175
4.25M
    xmlChar *val;
7176
4.25M
    int was_checked;
7177
4.25M
    xmlNodePtr list = NULL;
7178
4.25M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
4.25M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
4.25M
    if (NXT(1) == '#') {
7188
1.03M
  int i = 0;
7189
1.03M
  xmlChar out[16];
7190
1.03M
  int hex = NXT(2);
7191
1.03M
  int value = xmlParseCharRef(ctxt);
7192
7193
1.03M
  if (value == 0)
7194
272k
      return;
7195
761k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
538k
      if (value <= 0xFF) {
7202
502k
    out[0] = value;
7203
502k
    out[1] = 0;
7204
502k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
502k
        (!ctxt->disableSAX))
7206
449k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
502k
      } else {
7208
36.9k
    if ((hex == 'x') || (hex == 'X'))
7209
13.8k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
23.1k
    else
7211
23.1k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
36.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
36.9k
        (!ctxt->disableSAX))
7214
31.3k
        ctxt->sax->reference(ctxt->userData, out);
7215
36.9k
      }
7216
538k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
222k
      COPY_BUF(0 ,out, i, value);
7221
222k
      out[i] = 0;
7222
222k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
222k
    (!ctxt->disableSAX))
7224
208k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
222k
  }
7226
761k
  return;
7227
1.03M
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
3.21M
    ent = xmlParseEntityRef(ctxt);
7233
3.21M
    if (ent == NULL) return;
7234
1.84M
    if (!ctxt->wellFormed)
7235
787k
  return;
7236
1.05M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
1.05M
    if ((ent->name == NULL) ||
7240
1.05M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
50.4k
  val = ent->content;
7242
50.4k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
50.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
50.4k
      (!ctxt->disableSAX))
7248
50.4k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
50.4k
  return;
7250
50.4k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
1.00M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
1.00M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
64.3k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
61.8k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
61.8k
  void *user_data;
7273
61.8k
  if (ctxt->userData == ctxt)
7274
61.8k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
61.8k
        ctxt->sizeentcopy = 0;
7280
7281
61.8k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
403
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
403
            xmlHaltParser(ctxt);
7284
403
            return;
7285
403
        }
7286
7287
61.4k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
61.4k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
48.1k
      ctxt->depth++;
7297
48.1k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
48.1k
                                                user_data, &list);
7299
48.1k
      ctxt->depth--;
7300
7301
48.1k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
13.3k
      ctxt->depth++;
7303
13.3k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
13.3k
                                     user_data, ctxt->depth, ent->URI,
7305
13.3k
             ent->ExternalID, &list);
7306
13.3k
      ctxt->depth--;
7307
13.3k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
61.4k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
61.4k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
61.4k
        ent->expandedSize = ctxt->sizeentcopy;
7316
61.4k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
2.94k
            xmlHaltParser(ctxt);
7318
2.94k
      xmlFreeNodeList(list);
7319
2.94k
      return;
7320
2.94k
  }
7321
58.5k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
58.5k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
41.0k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
41.0k
            if ((ctxt->replaceEntities == 0) ||
7333
41.0k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
41.0k
                ((list->type == XML_TEXT_NODE) &&
7335
31.4k
                 (list->next == NULL))) {
7336
31.4k
                ent->owner = 1;
7337
100k
                while (list != NULL) {
7338
68.7k
                    list->parent = (xmlNodePtr) ent;
7339
68.7k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
68.7k
                    if (list->next == NULL)
7342
31.4k
                        ent->last = list;
7343
68.7k
                    list = list->next;
7344
68.7k
                }
7345
31.4k
                list = NULL;
7346
31.4k
            } else {
7347
9.59k
                ent->owner = 0;
7348
64.0k
                while (list != NULL) {
7349
54.4k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
54.4k
                    list->doc = ctxt->myDoc;
7351
54.4k
                    if (list->next == NULL)
7352
9.59k
                        ent->last = list;
7353
54.4k
                    list = list->next;
7354
54.4k
                }
7355
9.59k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
9.59k
            }
7361
41.0k
  } else if ((ret != XML_ERR_OK) &&
7362
17.4k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
10.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
10.0k
         "Entity '%s' failed to parse\n", ent->name);
7365
10.0k
            if (ent->content != NULL)
7366
5.11k
                ent->content[0] = 0;
7367
10.0k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
58.5k
        was_checked = 0;
7374
58.5k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
1.00M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
58.4k
  if (was_checked != 0) {
7389
38.4k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
38.4k
      if (ctxt->userData == ctxt)
7396
38.4k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
38.4k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
6.41k
    ctxt->depth++;
7402
6.41k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
6.41k
           ent->content, user_data, NULL);
7404
6.41k
    ctxt->depth--;
7405
31.9k
      } else if (ent->etype ==
7406
31.9k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
31.9k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
31.9k
    ctxt->depth++;
7410
31.9k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
31.9k
         ctxt->sax, user_data, ctxt->depth,
7412
31.9k
         ent->URI, ent->ExternalID, NULL);
7413
31.9k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
31.9k
                ctxt->sizeentities = oldsizeentities;
7417
31.9k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
38.4k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
38.4k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
38.4k
  }
7429
58.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
58.4k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
15.6k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
15.6k
  }
7437
58.4k
  return;
7438
58.4k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
944k
    if ((was_checked != 0) &&
7445
944k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
108
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
944k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
944k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
141k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
141k
  return;
7458
141k
    }
7459
7460
802k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
802k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
802k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
802k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
258k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
258k
    cur = ent->children;
7492
357k
    while (cur != NULL) {
7493
357k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
357k
        if (nw != NULL) {
7495
357k
      if (nw->_private == NULL)
7496
357k
          nw->_private = cur->_private;
7497
357k
      if (firstChild == NULL){
7498
258k
          firstChild = nw;
7499
258k
      }
7500
357k
      nw = xmlAddChild(ctxt->node, nw);
7501
357k
        }
7502
357k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
258k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
258k
          (nw != NULL) &&
7509
258k
          (nw->type == XML_ELEMENT_NODE) &&
7510
258k
          (nw->children == NULL))
7511
3.14k
          nw->extra = 1;
7512
7513
258k
      break;
7514
258k
        }
7515
98.6k
        cur = cur->next;
7516
98.6k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
544k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
544k
    xmlNodePtr nw = NULL, cur, next, last,
7523
544k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
544k
    cur = ent->children;
7532
544k
    ent->children = NULL;
7533
544k
    last = ent->last;
7534
544k
    ent->last = NULL;
7535
771k
    while (cur != NULL) {
7536
771k
        next = cur->next;
7537
771k
        cur->next = NULL;
7538
771k
        cur->parent = NULL;
7539
771k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
771k
        if (nw != NULL) {
7541
771k
      if (nw->_private == NULL)
7542
771k
          nw->_private = cur->_private;
7543
771k
      if (firstChild == NULL){
7544
544k
          firstChild = cur;
7545
544k
      }
7546
771k
      xmlAddChild((xmlNodePtr) ent, nw);
7547
771k
        }
7548
771k
        xmlAddChild(ctxt->node, cur);
7549
771k
        if (cur == last)
7550
544k
      break;
7551
227k
        cur = next;
7552
227k
    }
7553
544k
    if (ent->owner == 0)
7554
9.59k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
544k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
802k
      ctxt->nodemem = 0;
7582
802k
      ctxt->nodelen = 0;
7583
802k
      return;
7584
802k
  }
7585
802k
    }
7586
802k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
4.95M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
4.95M
    const xmlChar *name;
7621
4.95M
    xmlEntityPtr ent = NULL;
7622
7623
4.95M
    GROW;
7624
4.95M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
4.95M
    if (RAW != '&')
7628
0
        return(NULL);
7629
4.95M
    NEXT;
7630
4.95M
    name = xmlParseName(ctxt);
7631
4.95M
    if (name == NULL) {
7632
638k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
638k
           "xmlParseEntityRef: no name\n");
7634
638k
        return(NULL);
7635
638k
    }
7636
4.31M
    if (RAW != ';') {
7637
353k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
353k
  return(NULL);
7639
353k
    }
7640
3.96M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
3.96M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
3.01M
        ent = xmlGetPredefinedEntity(name);
7647
3.01M
        if (ent != NULL)
7648
293k
            return(ent);
7649
3.01M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
3.67M
    if (ctxt->sax != NULL) {
7656
3.67M
  if (ctxt->sax->getEntity != NULL)
7657
3.67M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
3.67M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
3.67M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
26.6k
      ent = xmlGetPredefinedEntity(name);
7661
3.67M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
3.67M
      (ctxt->userData==ctxt)) {
7663
66.0k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
66.0k
  }
7665
3.67M
    }
7666
3.67M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
3.67M
    if (ent == NULL) {
7690
681k
  if ((ctxt->standalone == 1) ||
7691
681k
      ((ctxt->hasExternalSubset == 0) &&
7692
658k
       (ctxt->hasPErefs == 0))) {
7693
438k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
438k
         "Entity '%s' not defined\n", name);
7695
438k
  } else {
7696
243k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
243k
         "Entity '%s' not defined\n", name);
7698
243k
      if ((ctxt->inSubset == 0) &&
7699
243k
    (ctxt->sax != NULL) &&
7700
243k
    (ctxt->sax->reference != NULL)) {
7701
241k
    ctxt->sax->reference(ctxt->userData, name);
7702
241k
      }
7703
243k
  }
7704
681k
  ctxt->valid = 0;
7705
681k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
2.99M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
2.03k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
2.03k
     "Entity reference to unparsed entity %s\n", name);
7715
2.03k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
2.98M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
2.98M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
9.96k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
9.96k
       "Attribute references external entity '%s'\n", name);
7726
9.96k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
2.97M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
2.97M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.29M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
28.7k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
2.69k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
28.7k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
28.7k
        }
7740
1.29M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
15.6k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
15.6k
                    "'<' in entity '%s' is not allowed in attributes "
7743
15.6k
                    "values\n", name);
7744
1.29M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
1.68M
    else {
7750
1.68M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
1.68M
      default:
7758
1.68M
      break;
7759
1.68M
  }
7760
1.68M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
3.67M
    return(ent);
7769
3.67M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
25.6M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
25.6M
    xmlChar *name;
7805
25.6M
    const xmlChar *ptr;
7806
25.6M
    xmlChar cur;
7807
25.6M
    xmlEntityPtr ent = NULL;
7808
7809
25.6M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
25.6M
    ptr = *str;
7812
25.6M
    cur = *ptr;
7813
25.6M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
25.6M
    ptr++;
7817
25.6M
    name = xmlParseStringName(ctxt, &ptr);
7818
25.6M
    if (name == NULL) {
7819
4.50k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
4.50k
           "xmlParseStringEntityRef: no name\n");
7821
4.50k
  *str = ptr;
7822
4.50k
  return(NULL);
7823
4.50k
    }
7824
25.6M
    if (*ptr != ';') {
7825
6.00k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
6.00k
        xmlFree(name);
7827
6.00k
  *str = ptr;
7828
6.00k
  return(NULL);
7829
6.00k
    }
7830
25.6M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
25.6M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
18.8M
        ent = xmlGetPredefinedEntity(name);
7838
18.8M
        if (ent != NULL) {
7839
14.8k
            xmlFree(name);
7840
14.8k
            *str = ptr;
7841
14.8k
            return(ent);
7842
14.8k
        }
7843
18.8M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
25.6M
    if (ctxt->sax != NULL) {
7850
25.6M
  if (ctxt->sax->getEntity != NULL)
7851
25.6M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
25.6M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
2.36M
      ent = xmlGetPredefinedEntity(name);
7854
25.6M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
4.28M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
4.28M
  }
7857
25.6M
    }
7858
25.6M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
25.6M
    if (ent == NULL) {
7885
4.28M
  if ((ctxt->standalone == 1) ||
7886
4.28M
      ((ctxt->hasExternalSubset == 0) &&
7887
4.28M
       (ctxt->hasPErefs == 0))) {
7888
4.27M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
4.27M
         "Entity '%s' not defined\n", name);
7890
4.27M
  } else {
7891
8.06k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
8.06k
        "Entity '%s' not defined\n",
7893
8.06k
        name);
7894
8.06k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
4.28M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
21.3M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
714
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
714
     "Entity reference to unparsed entity %s\n", name);
7906
714
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
21.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
21.3M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
825
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
825
   "Attribute references external entity '%s'\n", name);
7917
825
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
21.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
21.3M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
21.1M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
14.1k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
868
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
14.1k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
14.1k
        }
7931
21.1M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
66.4k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
66.4k
                    "'<' in entity '%s' is not allowed in attributes "
7934
66.4k
                    "values\n", name);
7935
21.1M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
158k
    else {
7941
158k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
158k
      default:
7949
158k
      break;
7950
158k
  }
7951
158k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
25.6M
    xmlFree(name);
7961
25.6M
    *str = ptr;
7962
25.6M
    return(ent);
7963
25.6M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
9.34M
{
8000
9.34M
    const xmlChar *name;
8001
9.34M
    xmlEntityPtr entity = NULL;
8002
9.34M
    xmlParserInputPtr input;
8003
8004
9.34M
    if (RAW != '%')
8005
0
        return;
8006
9.34M
    NEXT;
8007
9.34M
    name = xmlParseName(ctxt);
8008
9.34M
    if (name == NULL) {
8009
152k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
152k
  return;
8011
152k
    }
8012
9.19M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
9.19M
    if (RAW != ';') {
8016
15.2k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
15.2k
        return;
8018
15.2k
    }
8019
8020
9.17M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
9.17M
    if ((ctxt->sax != NULL) &&
8026
9.17M
  (ctxt->sax->getParameterEntity != NULL))
8027
9.17M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
9.17M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
9.17M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
58.8k
  if ((ctxt->standalone == 1) ||
8040
58.8k
      ((ctxt->hasExternalSubset == 0) &&
8041
58.4k
       (ctxt->hasPErefs == 0))) {
8042
4.75k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
4.75k
            "PEReference: %%%s; not found\n",
8044
4.75k
            name);
8045
54.1k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
54.1k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
12.7k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
12.7k
                                 "PEReference: %%%s; not found\n",
8056
12.7k
                                 name, NULL);
8057
12.7k
            } else
8058
41.3k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
41.3k
                              "PEReference: %%%s; not found\n",
8060
41.3k
                              name, NULL);
8061
54.1k
            ctxt->valid = 0;
8062
54.1k
  }
8063
9.12M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
9.12M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
9.12M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
9.12M
  } else {
8073
9.12M
            xmlChar start[4];
8074
9.12M
            xmlCharEncoding enc;
8075
9.12M
            unsigned long parentConsumed;
8076
9.12M
            xmlEntityPtr oldEnt;
8077
8078
9.12M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
9.12M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
9.12M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
9.12M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
9.12M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
9.12M
    (ctxt->replaceEntities == 0) &&
8084
9.12M
    (ctxt->validate == 0))
8085
901
    return;
8086
8087
9.11M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
336
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
336
                xmlHaltParser(ctxt);
8090
336
                return;
8091
336
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
9.11M
            parentConsumed = ctxt->input->parentConsumed;
8095
9.11M
            oldEnt = ctxt->input->entity;
8096
9.11M
            if ((oldEnt == NULL) ||
8097
9.11M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
8.74M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
531k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
531k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
531k
                                     ctxt->input->cur - ctxt->input->base);
8102
531k
            }
8103
8104
9.11M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
9.11M
      if (xmlPushInput(ctxt, input) < 0) {
8106
10.6k
                xmlFreeInputStream(input);
8107
10.6k
    return;
8108
10.6k
            }
8109
8110
9.10M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
9.10M
            input->parentConsumed = parentConsumed;
8113
8114
9.10M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
6.98k
                GROW
8125
6.98k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
6.98k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
6.89k
                    start[0] = RAW;
8129
6.89k
                    start[1] = NXT(1);
8130
6.89k
                    start[2] = NXT(2);
8131
6.89k
                    start[3] = NXT(3);
8132
6.89k
                    enc = xmlDetectCharEncoding(start, 4);
8133
6.89k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
1.94k
                        xmlSwitchEncoding(ctxt, enc);
8135
1.94k
                    }
8136
6.89k
                }
8137
8138
6.98k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
6.98k
                    (IS_BLANK_CH(NXT(5)))) {
8140
1.56k
                    xmlParseTextDecl(ctxt);
8141
1.56k
                }
8142
6.98k
            }
8143
9.10M
  }
8144
9.12M
    }
8145
9.16M
    ctxt->hasPErefs = 1;
8146
9.16M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
2.99k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
2.99k
    xmlParserInputPtr input;
8162
2.99k
    xmlBufferPtr buf;
8163
2.99k
    int l, c;
8164
2.99k
    int count = 0;
8165
8166
2.99k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
2.99k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
2.99k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
2.99k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
2.99k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
2.99k
    buf = xmlBufferCreate();
8180
2.99k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
2.99k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
2.99k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
2.99k
    if (input == NULL) {
8189
669
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
669
              "xmlLoadEntityContent input error");
8191
669
  xmlBufferFree(buf);
8192
669
        return(-1);
8193
669
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
2.32k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
2.32k
    GROW;
8206
2.32k
    c = CUR_CHAR(l);
8207
3.03M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
3.03M
           (IS_CHAR(c))) {
8209
3.03M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
3.03M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
28.6k
      count = 0;
8212
28.6k
      GROW;
8213
28.6k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
28.6k
  }
8218
3.03M
  NEXTL(l);
8219
3.03M
  c = CUR_CHAR(l);
8220
3.03M
  if (c == 0) {
8221
1.89k
      count = 0;
8222
1.89k
      GROW;
8223
1.89k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
1.89k
      c = CUR_CHAR(l);
8228
1.89k
  }
8229
3.03M
    }
8230
8231
2.32k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
1.28k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
1.28k
        xmlPopInput(ctxt);
8234
1.28k
    } else if (!IS_CHAR(c)) {
8235
1.04k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
1.04k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
1.04k
                    c);
8238
1.04k
  xmlBufferFree(buf);
8239
1.04k
  return(-1);
8240
1.04k
    }
8241
1.28k
    entity->content = buf->content;
8242
1.28k
    entity->length = buf->use;
8243
1.28k
    buf->content = NULL;
8244
1.28k
    xmlBufferFree(buf);
8245
8246
1.28k
    return(0);
8247
2.32k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
370k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
370k
    const xmlChar *ptr;
8283
370k
    xmlChar cur;
8284
370k
    xmlChar *name;
8285
370k
    xmlEntityPtr entity = NULL;
8286
8287
370k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
370k
    ptr = *str;
8289
370k
    cur = *ptr;
8290
370k
    if (cur != '%')
8291
0
        return(NULL);
8292
370k
    ptr++;
8293
370k
    name = xmlParseStringName(ctxt, &ptr);
8294
370k
    if (name == NULL) {
8295
11.1k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
11.1k
           "xmlParseStringPEReference: no name\n");
8297
11.1k
  *str = ptr;
8298
11.1k
  return(NULL);
8299
11.1k
    }
8300
359k
    cur = *ptr;
8301
359k
    if (cur != ';') {
8302
1.07k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
1.07k
  xmlFree(name);
8304
1.07k
  *str = ptr;
8305
1.07k
  return(NULL);
8306
1.07k
    }
8307
358k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
358k
    if ((ctxt->sax != NULL) &&
8313
358k
  (ctxt->sax->getParameterEntity != NULL))
8314
358k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
358k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
358k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
35.6k
  if ((ctxt->standalone == 1) ||
8330
35.6k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
2
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
2
     "PEReference: %%%s; not found\n", name);
8333
35.6k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
35.6k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
35.6k
        "PEReference: %%%s; not found\n",
8343
35.6k
        name, NULL);
8344
35.6k
      ctxt->valid = 0;
8345
35.6k
  }
8346
322k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
322k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
322k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
322k
    }
8357
358k
    ctxt->hasPErefs = 1;
8358
358k
    xmlFree(name);
8359
358k
    *str = ptr;
8360
358k
    return(entity);
8361
358k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
437k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
437k
    const xmlChar *name = NULL;
8382
437k
    xmlChar *ExternalID = NULL;
8383
437k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
437k
    SKIP(9);
8389
8390
437k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
437k
    name = xmlParseName(ctxt);
8396
437k
    if (name == NULL) {
8397
3.38k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
3.38k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
3.38k
    }
8400
437k
    ctxt->intSubName = name;
8401
8402
437k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
437k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
437k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
175k
        ctxt->hasExternalSubset = 1;
8411
175k
    }
8412
437k
    ctxt->extSubURI = URI;
8413
437k
    ctxt->extSubSystem = ExternalID;
8414
8415
437k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
437k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
437k
  (!ctxt->disableSAX))
8422
414k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
437k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
437k
    if (RAW == '[')
8431
267k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
169k
    if (RAW != '>') {
8437
44.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
44.3k
    }
8439
169k
    NEXT;
8440
169k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
276k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
276k
    if (RAW == '[') {
8457
276k
        int baseInputNr = ctxt->inputNr;
8458
276k
        ctxt->instate = XML_PARSER_DTD;
8459
276k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
276k
  SKIP_BLANKS;
8466
10.9M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
10.9M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
10.7M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
10.7M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
10.7M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
10.5M
          xmlParseMarkupDecl(ctxt);
8478
10.5M
            } else if (RAW == '%') {
8479
161k
          xmlParsePEReference(ctxt);
8480
161k
            } else {
8481
100k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
100k
                        "xmlParseInternalSubset: error detected in"
8483
100k
                        " Markup declaration\n");
8484
100k
                xmlHaltParser(ctxt);
8485
100k
                return;
8486
100k
            }
8487
10.6M
      SKIP_BLANKS;
8488
10.6M
  }
8489
176k
  if (RAW == ']') {
8490
148k
      NEXT;
8491
148k
      SKIP_BLANKS;
8492
148k
  }
8493
176k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
176k
    if (RAW != '>') {
8499
29.9k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
29.9k
  return;
8501
29.9k
    }
8502
146k
    NEXT;
8503
146k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
4.10M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
4.10M
    const xmlChar *name;
8544
4.10M
    xmlChar *val;
8545
8546
4.10M
    *value = NULL;
8547
4.10M
    GROW;
8548
4.10M
    name = xmlParseName(ctxt);
8549
4.10M
    if (name == NULL) {
8550
1.37M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
1.37M
                 "error parsing attribute name\n");
8552
1.37M
        return(NULL);
8553
1.37M
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
2.73M
    SKIP_BLANKS;
8559
2.73M
    if (RAW == '=') {
8560
2.17M
        NEXT;
8561
2.17M
  SKIP_BLANKS;
8562
2.17M
  val = xmlParseAttValue(ctxt);
8563
2.17M
  ctxt->instate = XML_PARSER_CONTENT;
8564
2.17M
    } else {
8565
559k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
559k
         "Specification mandates value for attribute %s\n", name);
8567
559k
  return(name);
8568
559k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
2.17M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
78.9k
  if (!xmlCheckLanguageID(val)) {
8577
60.5k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
60.5k
              "Malformed value for xml:lang : %s\n",
8579
60.5k
        val, NULL);
8580
60.5k
  }
8581
78.9k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
2.17M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
2.91k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
24
      *(ctxt->space) = 0;
8589
2.88k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
198
      *(ctxt->space) = 1;
8591
2.68k
  else {
8592
2.68k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
2.68k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
2.68k
                                 val, NULL);
8595
2.68k
  }
8596
2.91k
    }
8597
8598
2.17M
    *value = val;
8599
2.17M
    return(name);
8600
2.73M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
4.63M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
4.63M
    const xmlChar *name;
8634
4.63M
    const xmlChar *attname;
8635
4.63M
    xmlChar *attvalue;
8636
4.63M
    const xmlChar **atts = ctxt->atts;
8637
4.63M
    int nbatts = 0;
8638
4.63M
    int maxatts = ctxt->maxatts;
8639
4.63M
    int i;
8640
8641
4.63M
    if (RAW != '<') return(NULL);
8642
4.63M
    NEXT1;
8643
8644
4.63M
    name = xmlParseName(ctxt);
8645
4.63M
    if (name == NULL) {
8646
383k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
383k
       "xmlParseStartTag: invalid element name\n");
8648
383k
        return(NULL);
8649
383k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
4.25M
    SKIP_BLANKS;
8657
4.25M
    GROW;
8658
8659
6.07M
    while (((RAW != '>') &&
8660
6.07M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
6.07M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
4.10M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
4.10M
        if (attname == NULL) {
8664
1.37M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
1.37M
         "xmlParseStartTag: problem parsing attributes\n");
8666
1.37M
      break;
8667
1.37M
  }
8668
2.73M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
3.08M
      for (i = 0; i < nbatts;i += 2) {
8675
1.00M
          if (xmlStrEqual(atts[i], attname)) {
8676
40.6k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
40.6k
        xmlFree(attvalue);
8678
40.6k
        goto failed;
8679
40.6k
    }
8680
1.00M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
2.07M
      if (atts == NULL) {
8685
116k
          maxatts = 22; /* allow for 10 attrs by default */
8686
116k
          atts = (const xmlChar **)
8687
116k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
116k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
116k
    ctxt->atts = atts;
8695
116k
    ctxt->maxatts = maxatts;
8696
1.95M
      } else if (nbatts + 4 > maxatts) {
8697
133
          const xmlChar **n;
8698
8699
133
          maxatts *= 2;
8700
133
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
133
               maxatts * sizeof(const xmlChar *));
8702
133
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
133
    atts = n;
8709
133
    ctxt->atts = atts;
8710
133
    ctxt->maxatts = maxatts;
8711
133
      }
8712
2.07M
      atts[nbatts++] = attname;
8713
2.07M
      atts[nbatts++] = attvalue;
8714
2.07M
      atts[nbatts] = NULL;
8715
2.07M
      atts[nbatts + 1] = NULL;
8716
2.07M
  } else {
8717
616k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
616k
  }
8720
8721
2.73M
failed:
8722
8723
2.73M
  GROW
8724
2.73M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
905k
      break;
8726
1.82M
  if (SKIP_BLANKS == 0) {
8727
1.05M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
1.05M
         "attributes construct error\n");
8729
1.05M
  }
8730
1.82M
  SHRINK;
8731
1.82M
        GROW;
8732
1.82M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
4.25M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
4.25M
  (!ctxt->disableSAX)) {
8739
3.99M
  if (nbatts > 0)
8740
1.36M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
2.63M
  else
8742
2.63M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
3.99M
    }
8744
8745
4.25M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
5.34M
        for (i = 1;i < nbatts;i+=2)
8748
2.07M
      if (atts[i] != NULL)
8749
2.07M
         xmlFree((xmlChar *) atts[i]);
8750
3.27M
    }
8751
4.25M
    return(name);
8752
4.25M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.21M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.21M
    const xmlChar *name;
8772
8773
1.21M
    GROW;
8774
1.21M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.21M
    SKIP(2);
8780
8781
1.21M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.21M
    GROW;
8787
1.21M
    SKIP_BLANKS;
8788
1.21M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
203k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
203k
    } else
8791
1.01M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.21M
    if (name != (xmlChar*)1) {
8800
407k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
407k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
407k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
407k
                    ctxt->name, line, name);
8804
407k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.21M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.21M
  (!ctxt->disableSAX))
8811
1.12M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.21M
    namePop(ctxt);
8814
1.21M
    spacePop(ctxt);
8815
1.21M
    return;
8816
1.21M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
8.58M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
8.58M
    int i;
8858
8859
8.58M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
17.7M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
11.0M
        if (ctxt->nsTab[i] == prefix) {
8862
1.56M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
35.6k
          return(NULL);
8864
1.53M
      return(ctxt->nsTab[i + 1]);
8865
1.56M
  }
8866
6.66M
    return(NULL);
8867
8.23M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
15.7M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
15.7M
    const xmlChar *l, *p;
8886
8887
15.7M
    GROW;
8888
8889
15.7M
    l = xmlParseNCName(ctxt);
8890
15.7M
    if (l == NULL) {
8891
1.76M
        if (CUR == ':') {
8892
46.4k
      l = xmlParseName(ctxt);
8893
46.4k
      if (l != NULL) {
8894
46.4k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
46.4k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
46.4k
    *prefix = NULL;
8897
46.4k
    return(l);
8898
46.4k
      }
8899
46.4k
  }
8900
1.71M
        return(NULL);
8901
1.76M
    }
8902
13.9M
    if (CUR == ':') {
8903
3.40M
        NEXT;
8904
3.40M
  p = l;
8905
3.40M
  l = xmlParseNCName(ctxt);
8906
3.40M
  if (l == NULL) {
8907
169k
      xmlChar *tmp;
8908
8909
169k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
169k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
169k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
169k
      l = xmlParseNmtoken(ctxt);
8914
169k
      if (l == NULL) {
8915
116k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
116k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
116k
            } else {
8919
52.4k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
52.4k
    xmlFree((char *)l);
8921
52.4k
      }
8922
169k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
169k
      if (tmp != NULL) xmlFree(tmp);
8924
169k
      *prefix = NULL;
8925
169k
      return(p);
8926
169k
  }
8927
3.23M
  if (CUR == ':') {
8928
87.0k
      xmlChar *tmp;
8929
8930
87.0k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
87.0k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
87.0k
      NEXT;
8933
87.0k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
87.0k
      if (tmp != NULL) {
8935
72.6k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
72.6k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
72.6k
    if (tmp != NULL) xmlFree(tmp);
8938
72.6k
    *prefix = p;
8939
72.6k
    return(l);
8940
72.6k
      }
8941
14.4k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
14.4k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
14.4k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
14.4k
      if (tmp != NULL) xmlFree(tmp);
8946
14.4k
      *prefix = p;
8947
14.4k
      return(l);
8948
14.4k
  }
8949
3.14M
  *prefix = p;
8950
3.14M
    } else
8951
10.5M
        *prefix = NULL;
8952
13.6M
    return(l);
8953
13.9M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
616k
                        xmlChar const *prefix) {
8971
616k
    const xmlChar *cmp;
8972
616k
    const xmlChar *in;
8973
616k
    const xmlChar *ret;
8974
616k
    const xmlChar *prefix2;
8975
8976
616k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
616k
    GROW;
8979
616k
    in = ctxt->input->cur;
8980
8981
616k
    cmp = prefix;
8982
1.37M
    while (*in != 0 && *in == *cmp) {
8983
755k
  ++in;
8984
755k
  ++cmp;
8985
755k
    }
8986
616k
    if ((*cmp == 0) && (*in == ':')) {
8987
481k
        in++;
8988
481k
  cmp = name;
8989
2.53M
  while (*in != 0 && *in == *cmp) {
8990
2.05M
      ++in;
8991
2.05M
      ++cmp;
8992
2.05M
  }
8993
481k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
296k
            ctxt->input->col += in - ctxt->input->cur;
8996
296k
      ctxt->input->cur = in;
8997
296k
      return((const xmlChar*) 1);
8998
296k
  }
8999
481k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
320k
    ret = xmlParseQName (ctxt, &prefix2);
9004
320k
    if ((ret == name) && (prefix == prefix2))
9005
16.6k
  return((const xmlChar*) 1);
9006
304k
    return ret;
9007
320k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
4.39k
    const xmlChar *oldbase = ctxt->input->base;\
9045
4.39k
    GROW;\
9046
4.39k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
4.39k
        return(NULL);\
9048
4.39k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
4.39k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
7.76M
{
9059
7.76M
    xmlChar limit = 0;
9060
7.76M
    const xmlChar *in = NULL, *start, *end, *last;
9061
7.76M
    xmlChar *ret = NULL;
9062
7.76M
    int line, col;
9063
7.76M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
1.96M
                    XML_MAX_HUGE_LENGTH :
9065
7.76M
                    XML_MAX_TEXT_LENGTH;
9066
9067
7.76M
    GROW;
9068
7.76M
    in = (xmlChar *) CUR_PTR;
9069
7.76M
    line = ctxt->input->line;
9070
7.76M
    col = ctxt->input->col;
9071
7.76M
    if (*in != '"' && *in != '\'') {
9072
139k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
139k
        return (NULL);
9074
139k
    }
9075
7.62M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
7.62M
    limit = *in++;
9083
7.62M
    col++;
9084
7.62M
    end = ctxt->input->end;
9085
7.62M
    start = in;
9086
7.62M
    if (in >= end) {
9087
383
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
383
    }
9089
7.62M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
429k
  while ((in < end) && (*in != limit) &&
9094
429k
         ((*in == 0x20) || (*in == 0x9) ||
9095
425k
          (*in == 0xA) || (*in == 0xD))) {
9096
190k
      if (*in == 0xA) {
9097
31.2k
          line++; col = 1;
9098
159k
      } else {
9099
159k
          col++;
9100
159k
      }
9101
190k
      in++;
9102
190k
      start = in;
9103
190k
      if (in >= end) {
9104
105
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
105
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
105
      }
9111
190k
  }
9112
2.01M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
2.01M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
1.78M
      col++;
9115
1.78M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
1.77M
      if (in >= end) {
9117
234
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
234
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
234
      }
9124
1.77M
  }
9125
238k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
250k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
353k
  while ((in < end) && (*in != limit) &&
9131
353k
         ((*in == 0x20) || (*in == 0x9) ||
9132
194k
          (*in == 0xA) || (*in == 0xD))) {
9133
115k
      if (*in == 0xA) {
9134
35.2k
          line++, col = 1;
9135
80.1k
      } else {
9136
80.1k
          col++;
9137
80.1k
      }
9138
115k
      in++;
9139
115k
      if (in >= end) {
9140
214
    const xmlChar *oldbase = ctxt->input->base;
9141
214
    GROW;
9142
214
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
214
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
214
    end = ctxt->input->end;
9151
214
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
214
      }
9157
115k
  }
9158
238k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
238k
  if (*in != limit) goto need_complex;
9164
7.38M
    } else {
9165
95.7M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
95.7M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
88.3M
      in++;
9168
88.3M
      col++;
9169
88.3M
      if (in >= end) {
9170
3.67k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
3.67k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
3.67k
      }
9177
88.3M
  }
9178
7.38M
  last = in;
9179
7.38M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
7.38M
  if (*in != limit) goto need_complex;
9185
7.38M
    }
9186
6.13M
    in++;
9187
6.13M
    col++;
9188
6.13M
    if (len != NULL) {
9189
4.41M
        if (alloc) *alloc = 0;
9190
4.41M
        *len = last - start;
9191
4.41M
        ret = (xmlChar *) start;
9192
4.41M
    } else {
9193
1.72M
        if (alloc) *alloc = 1;
9194
1.72M
        ret = xmlStrndup(start, last - start);
9195
1.72M
    }
9196
6.13M
    CUR_PTR = in;
9197
6.13M
    ctxt->input->line = line;
9198
6.13M
    ctxt->input->col = col;
9199
6.13M
    return ret;
9200
1.48M
need_complex:
9201
1.48M
    if (alloc) *alloc = 1;
9202
1.48M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
7.62M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
6.83M
{
9226
6.83M
    const xmlChar *name;
9227
6.83M
    xmlChar *val, *internal_val = NULL;
9228
6.83M
    int normalize = 0;
9229
9230
6.83M
    *value = NULL;
9231
6.83M
    GROW;
9232
6.83M
    name = xmlParseQName(ctxt, prefix);
9233
6.83M
    if (name == NULL) {
9234
1.08M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
1.08M
                       "error parsing attribute name\n");
9236
1.08M
        return (NULL);
9237
1.08M
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
5.75M
    if (ctxt->attsSpecial != NULL) {
9243
590k
        int type;
9244
9245
590k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
590k
                                                 pref, elem, *prefix, name);
9247
590k
        if (type != 0)
9248
246k
            normalize = 1;
9249
590k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
5.75M
    SKIP_BLANKS;
9255
5.75M
    if (RAW == '=') {
9256
5.31M
        NEXT;
9257
5.31M
        SKIP_BLANKS;
9258
5.31M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
5.31M
        if (val == NULL)
9260
72.9k
            return (NULL);
9261
5.23M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
238k
      if (*alloc) {
9269
79.4k
          const xmlChar *val2;
9270
9271
79.4k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
79.4k
    if ((val2 != NULL) && (val2 != val)) {
9273
14.2k
        xmlFree(val);
9274
14.2k
        val = (xmlChar *) val2;
9275
14.2k
    }
9276
79.4k
      }
9277
238k
  }
9278
5.23M
        ctxt->instate = XML_PARSER_CONTENT;
9279
5.23M
    } else {
9280
440k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
440k
                          "Specification mandates value for attribute %s\n",
9282
440k
                          name);
9283
440k
        return (name);
9284
440k
    }
9285
9286
5.23M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
307k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
89.8k
            internal_val = xmlStrndup(val, *len);
9294
89.8k
            if (!xmlCheckLanguageID(internal_val)) {
9295
71.5k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
71.5k
                              "Malformed value for xml:lang : %s\n",
9297
71.5k
                              internal_val, NULL);
9298
71.5k
            }
9299
89.8k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
307k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
4.21k
            internal_val = xmlStrndup(val, *len);
9306
4.21k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
52
                *(ctxt->space) = 0;
9308
4.16k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
340
                *(ctxt->space) = 1;
9310
3.82k
            else {
9311
3.82k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
3.82k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
3.82k
                              internal_val, NULL);
9314
3.82k
            }
9315
4.21k
        }
9316
307k
        if (internal_val) {
9317
94.1k
            xmlFree(internal_val);
9318
94.1k
        }
9319
307k
    }
9320
9321
5.23M
    *value = val;
9322
5.23M
    return (name);
9323
5.75M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
8.54M
                  const xmlChar **URI, int *tlen) {
9356
8.54M
    const xmlChar *localname;
9357
8.54M
    const xmlChar *prefix;
9358
8.54M
    const xmlChar *attname;
9359
8.54M
    const xmlChar *aprefix;
9360
8.54M
    const xmlChar *nsname;
9361
8.54M
    xmlChar *attvalue;
9362
8.54M
    const xmlChar **atts = ctxt->atts;
9363
8.54M
    int maxatts = ctxt->maxatts;
9364
8.54M
    int nratts, nbatts, nbdef, inputid;
9365
8.54M
    int i, j, nbNs, attval;
9366
8.54M
    unsigned long cur;
9367
8.54M
    int nsNr = ctxt->nsNr;
9368
9369
8.54M
    if (RAW != '<') return(NULL);
9370
8.54M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
8.54M
    SHRINK;
9380
8.54M
    cur = ctxt->input->cur - ctxt->input->base;
9381
8.54M
    inputid = ctxt->input->id;
9382
8.54M
    nbatts = 0;
9383
8.54M
    nratts = 0;
9384
8.54M
    nbdef = 0;
9385
8.54M
    nbNs = 0;
9386
8.54M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
8.54M
    ctxt->nsNr = nsNr;
9389
9390
8.54M
    localname = xmlParseQName(ctxt, &prefix);
9391
8.54M
    if (localname == NULL) {
9392
614k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
614k
           "StartTag: invalid element name\n");
9394
614k
        return(NULL);
9395
614k
    }
9396
7.93M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
7.93M
    SKIP_BLANKS;
9404
7.93M
    GROW;
9405
9406
10.0M
    while (((RAW != '>') &&
9407
10.0M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
10.0M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
6.83M
  int len = -1, alloc = 0;
9410
9411
6.83M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
6.83M
                               &aprefix, &attvalue, &len, &alloc);
9413
6.83M
        if (attname == NULL) {
9414
1.15M
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
1.15M
           "xmlParseStartTag: problem parsing attributes\n");
9416
1.15M
      break;
9417
1.15M
  }
9418
5.67M
        if (attvalue == NULL)
9419
440k
            goto next_attr;
9420
5.23M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
5.23M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
182k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
182k
            xmlURIPtr uri;
9425
9426
182k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
182k
            if (*URL != 0) {
9434
169k
                uri = xmlParseURI((const char *) URL);
9435
169k
                if (uri == NULL) {
9436
70.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
70.3k
                             "xmlns: '%s' is not a valid URI\n",
9438
70.3k
                                       URL, NULL, NULL);
9439
99.0k
                } else {
9440
99.0k
                    if (uri->scheme == NULL) {
9441
45.8k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
45.8k
                                  "xmlns: URI %s is not absolute\n",
9443
45.8k
                                  URL, NULL, NULL);
9444
45.8k
                    }
9445
99.0k
                    xmlFreeURI(uri);
9446
99.0k
                }
9447
169k
                if (URL == ctxt->str_xml_ns) {
9448
2
                    if (attname != ctxt->str_xml) {
9449
2
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
2
                     "xml namespace URI cannot be the default namespace\n",
9451
2
                                 NULL, NULL, NULL);
9452
2
                    }
9453
2
                    goto next_attr;
9454
2
                }
9455
169k
                if ((len == 29) &&
9456
169k
                    (xmlStrEqual(URL,
9457
4.72k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
357
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
357
                         "reuse of the xmlns namespace name is forbidden\n",
9460
357
                             NULL, NULL, NULL);
9461
357
                    goto next_attr;
9462
357
                }
9463
169k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
204k
            for (j = 1;j <= nbNs;j++)
9468
34.4k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
12.5k
                    break;
9470
182k
            if (j <= nbNs)
9471
12.5k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
169k
            else
9473
169k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
5.05M
        } else if (aprefix == ctxt->str_xmlns) {
9476
454k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
454k
            xmlURIPtr uri;
9478
9479
454k
            if (attname == ctxt->str_xml) {
9480
6.79k
                if (URL != ctxt->str_xml_ns) {
9481
6.65k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
6.65k
                             "xml namespace prefix mapped to wrong URI\n",
9483
6.65k
                             NULL, NULL, NULL);
9484
6.65k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
6.79k
                goto next_attr;
9489
6.79k
            }
9490
447k
            if (URL == ctxt->str_xml_ns) {
9491
103
                if (attname != ctxt->str_xml) {
9492
103
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
103
                             "xml namespace URI mapped to wrong prefix\n",
9494
103
                             NULL, NULL, NULL);
9495
103
                }
9496
103
                goto next_attr;
9497
103
            }
9498
447k
            if (attname == ctxt->str_xmlns) {
9499
2.03k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
2.03k
                         "redefinition of the xmlns prefix is forbidden\n",
9501
2.03k
                         NULL, NULL, NULL);
9502
2.03k
                goto next_attr;
9503
2.03k
            }
9504
445k
            if ((len == 29) &&
9505
445k
                (xmlStrEqual(URL,
9506
16.3k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
1.66k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
1.66k
                         "reuse of the xmlns namespace name is forbidden\n",
9509
1.66k
                         NULL, NULL, NULL);
9510
1.66k
                goto next_attr;
9511
1.66k
            }
9512
443k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
10.9k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
10.9k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
10.9k
                              attname, NULL, NULL);
9516
10.9k
                goto next_attr;
9517
432k
            } else {
9518
432k
                uri = xmlParseURI((const char *) URL);
9519
432k
                if (uri == NULL) {
9520
152k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
152k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
152k
                                       attname, URL, NULL);
9523
280k
                } else {
9524
280k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
47.1k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
47.1k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
47.1k
                                  attname, URL, NULL);
9528
47.1k
                    }
9529
280k
                    xmlFreeURI(uri);
9530
280k
                }
9531
432k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
524k
            for (j = 1;j <= nbNs;j++)
9537
103k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
12.0k
                    break;
9539
432k
            if (j <= nbNs)
9540
12.0k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
420k
            else
9542
420k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
4.60M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
4.60M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
157k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
157k
                maxatts = ctxt->maxatts;
9553
157k
                atts = ctxt->atts;
9554
157k
            }
9555
4.60M
            ctxt->attallocs[nratts++] = alloc;
9556
4.60M
            atts[nbatts++] = attname;
9557
4.60M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
4.60M
            if (alloc)
9565
617k
                atts[nbatts++] = NULL;
9566
3.98M
            else
9567
3.98M
                atts[nbatts++] = ctxt->input->base;
9568
4.60M
            atts[nbatts++] = attvalue;
9569
4.60M
            attvalue += len;
9570
4.60M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
4.60M
            if (alloc != 0) attval = 1;
9575
4.60M
            attvalue = NULL; /* moved into atts */
9576
4.60M
        }
9577
9578
5.67M
next_attr:
9579
5.67M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
208k
            xmlFree(attvalue);
9581
208k
            attvalue = NULL;
9582
208k
        }
9583
9584
5.67M
  GROW
9585
5.67M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
5.67M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
2.50M
      break;
9589
3.17M
  if (SKIP_BLANKS == 0) {
9590
1.07M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
1.07M
         "attributes construct error\n");
9592
1.07M
      break;
9593
1.07M
  }
9594
2.09M
        GROW;
9595
2.09M
    }
9596
9597
7.93M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
12.5M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
4.60M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
3.98M
            const xmlChar *old = atts[i+2];
9612
3.98M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
3.98M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
3.98M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
3.98M
        }
9616
4.60M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
7.93M
    if (ctxt->attsDefault != NULL) {
9622
597k
        xmlDefAttrsPtr defaults;
9623
9624
597k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
597k
  if (defaults != NULL) {
9626
250k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
164k
          attname = defaults->values[5 * i];
9628
164k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
164k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
11.4k
        for (j = 1;j <= nbNs;j++)
9638
5.73k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
4.13k
          break;
9640
9.80k
              if (j <= nbNs) continue;
9641
9642
5.66k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
5.66k
        if (nsname != defaults->values[5 * i + 2]) {
9644
2.56k
      if (nsPush(ctxt, NULL,
9645
2.56k
                 defaults->values[5 * i + 2]) > 0)
9646
2.54k
          nbNs++;
9647
2.56k
        }
9648
154k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
29.4k
        for (j = 1;j <= nbNs;j++)
9653
12.9k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
2.01k
          break;
9655
18.5k
              if (j <= nbNs) continue;
9656
9657
16.5k
        nsname = xmlGetNamespace(ctxt, attname);
9658
16.5k
        if (nsname != defaults->values[5 * i + 2]) {
9659
6.52k
      if (nsPush(ctxt, attname,
9660
6.52k
                 defaults->values[5 * i + 2]) > 0)
9661
6.16k
          nbNs++;
9662
6.52k
        }
9663
136k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
334k
        for (j = 0;j < nbatts;j+=5) {
9668
202k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
4.36k
          break;
9670
202k
        }
9671
136k
        if (j < nbatts) continue;
9672
9673
132k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
4.88k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
4.88k
      maxatts = ctxt->maxatts;
9679
4.88k
      atts = ctxt->atts;
9680
4.88k
        }
9681
132k
        atts[nbatts++] = attname;
9682
132k
        atts[nbatts++] = aprefix;
9683
132k
        if (aprefix == NULL)
9684
96.9k
      atts[nbatts++] = NULL;
9685
35.0k
        else
9686
35.0k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
132k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
132k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
132k
        if ((ctxt->standalone == 1) &&
9690
132k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
132k
        nbdef++;
9696
132k
    }
9697
164k
      }
9698
86.0k
  }
9699
597k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
12.6M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
4.73M
  if (atts[i + 1] != NULL) {
9709
598k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
598k
      if (nsname == NULL) {
9711
193k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
193k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
193k
        atts[i + 1], atts[i], localname);
9714
193k
      }
9715
598k
      atts[i + 2] = nsname;
9716
598k
  } else
9717
4.13M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
6.93M
        for (j = 0; j < i;j += 5) {
9725
2.24M
      if (atts[i] == atts[j]) {
9726
76.9k
          if (atts[i+1] == atts[j+1]) {
9727
42.5k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
42.5k
        break;
9729
42.5k
    }
9730
34.4k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
78
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
78
           "Namespaced Attribute %s in '%s' redefined\n",
9733
78
           atts[i], nsname, NULL);
9734
78
        break;
9735
78
    }
9736
34.4k
      }
9737
2.24M
  }
9738
4.73M
    }
9739
9740
7.93M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
7.93M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
951k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
951k
           "Namespace prefix %s on %s is not defined\n",
9744
951k
     prefix, localname, NULL);
9745
951k
    }
9746
7.93M
    *pref = prefix;
9747
7.93M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
7.93M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
7.93M
  (!ctxt->disableSAX)) {
9754
7.13M
  if (nbNs > 0)
9755
391k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
391k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
391k
        nbatts / 5, nbdef, atts);
9758
6.74M
  else
9759
6.74M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
6.74M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
7.13M
    }
9762
9763
7.93M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
7.93M
    if (attval != 0) {
9768
1.37M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
803k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
617k
          xmlFree((xmlChar *) atts[i]);
9771
572k
    }
9772
9773
7.93M
    return(localname);
9774
7.93M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
2.38M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
2.38M
    const xmlChar *name;
9794
9795
2.38M
    GROW;
9796
2.38M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
2.38M
    SKIP(2);
9801
9802
2.38M
    if (tag->prefix == NULL)
9803
1.77M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
616k
    else
9805
616k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
2.38M
    GROW;
9811
2.38M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
2.38M
    SKIP_BLANKS;
9814
2.38M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
307k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
307k
    } else
9817
2.08M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
2.38M
    if (name != (xmlChar*)1) {
9826
636k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
636k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
636k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
636k
                    ctxt->name, tag->line, name);
9830
636k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
2.38M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
2.38M
  (!ctxt->disableSAX))
9837
2.10M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
2.10M
                                tag->URI);
9839
9840
2.38M
    spacePop(ctxt);
9841
2.38M
    if (tag->nsNr != 0)
9842
68.9k
  nsPop(ctxt, tag->nsNr);
9843
2.38M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
113k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
113k
    xmlChar *buf = NULL;
9864
113k
    int len = 0;
9865
113k
    int size = XML_PARSER_BUFFER_SIZE;
9866
113k
    int r, rl;
9867
113k
    int s, sl;
9868
113k
    int cur, l;
9869
113k
    int count = 0;
9870
113k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
31.7k
                    XML_MAX_HUGE_LENGTH :
9872
113k
                    XML_MAX_TEXT_LENGTH;
9873
9874
113k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
113k
    SKIP(3);
9877
9878
113k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
113k
    SKIP(6);
9881
9882
113k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
113k
    r = CUR_CHAR(rl);
9884
113k
    if (!IS_CHAR(r)) {
9885
6.70k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
6.70k
        goto out;
9887
6.70k
    }
9888
106k
    NEXTL(rl);
9889
106k
    s = CUR_CHAR(sl);
9890
106k
    if (!IS_CHAR(s)) {
9891
8.73k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
8.73k
        goto out;
9893
8.73k
    }
9894
98.0k
    NEXTL(sl);
9895
98.0k
    cur = CUR_CHAR(l);
9896
98.0k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
98.0k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
16.2M
    while (IS_CHAR(cur) &&
9902
16.2M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
16.1M
  if (len + 5 >= size) {
9904
53.8k
      xmlChar *tmp;
9905
9906
53.8k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
53.8k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
53.8k
      buf = tmp;
9912
53.8k
      size *= 2;
9913
53.8k
  }
9914
16.1M
  COPY_BUF(rl,buf,len,r);
9915
16.1M
  r = s;
9916
16.1M
  rl = sl;
9917
16.1M
  s = cur;
9918
16.1M
  sl = l;
9919
16.1M
  count++;
9920
16.1M
  if (count > 50) {
9921
294k
      SHRINK;
9922
294k
      GROW;
9923
294k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
294k
      count = 0;
9927
294k
  }
9928
16.1M
  NEXTL(l);
9929
16.1M
  cur = CUR_CHAR(l);
9930
16.1M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
16.1M
    }
9936
98.0k
    buf[len] = 0;
9937
98.0k
    if (cur != '>') {
9938
22.2k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
22.2k
                       "CData section not finished\n%.50s\n", buf);
9940
22.2k
        goto out;
9941
22.2k
    }
9942
75.7k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
75.7k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
61.7k
  if (ctxt->sax->cdataBlock != NULL)
9949
41.8k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
19.8k
  else if (ctxt->sax->characters != NULL)
9951
19.8k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
61.7k
    }
9953
9954
113k
out:
9955
113k
    if (ctxt->instate != XML_PARSER_EOF)
9956
113k
        ctxt->instate = XML_PARSER_CONTENT;
9957
113k
    xmlFree(buf);
9958
113k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
188k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
188k
    int nameNr = ctxt->nameNr;
9971
9972
188k
    GROW;
9973
17.5M
    while ((RAW != 0) &&
9974
17.5M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
17.3M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
17.3M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
184k
      xmlParsePI(ctxt);
9982
184k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
17.1M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
113k
      xmlParseCDSect(ctxt);
9990
113k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
17.0M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
17.0M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
192k
      xmlParseComment(ctxt);
9998
192k
      ctxt->instate = XML_PARSER_CONTENT;
9999
192k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
16.8M
  else if (*cur == '<') {
10005
6.76M
            if (NXT(1) == '/') {
10006
1.42M
                if (ctxt->nameNr <= nameNr)
10007
30.5k
                    break;
10008
1.39M
          xmlParseElementEnd(ctxt);
10009
5.34M
            } else {
10010
5.34M
          xmlParseElementStart(ctxt);
10011
5.34M
            }
10012
6.76M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
10.1M
  else if (*cur == '&') {
10020
1.70M
      xmlParseReference(ctxt);
10021
1.70M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
8.39M
  else {
10027
8.39M
      xmlParseCharData(ctxt, 0);
10028
8.39M
  }
10029
10030
17.3M
  GROW;
10031
17.3M
  SHRINK;
10032
17.3M
    }
10033
188k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
59.1k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
59.1k
    int nameNr = ctxt->nameNr;
10047
10048
59.1k
    xmlParseContentInternal(ctxt);
10049
10050
59.1k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
2.10k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
2.10k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
2.10k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
2.10k
                "Premature end of data in tag %s line %d\n",
10055
2.10k
    name, line, NULL);
10056
2.10k
    }
10057
59.1k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
212k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
212k
    if (xmlParseElementStart(ctxt) != 0)
10078
83.1k
        return;
10079
10080
129k
    xmlParseContentInternal(ctxt);
10081
129k
    if (ctxt->instate == XML_PARSER_EOF)
10082
345
  return;
10083
10084
128k
    if (CUR == 0) {
10085
98.9k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
98.9k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
98.9k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
98.9k
                "Premature end of data in tag %s line %d\n",
10089
98.9k
    name, line, NULL);
10090
98.9k
        return;
10091
98.9k
    }
10092
10093
29.9k
    xmlParseElementEnd(ctxt);
10094
29.9k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
5.55M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
5.55M
    const xmlChar *name;
10108
5.55M
    const xmlChar *prefix = NULL;
10109
5.55M
    const xmlChar *URI = NULL;
10110
5.55M
    xmlParserNodeInfo node_info;
10111
5.55M
    int line, tlen = 0;
10112
5.55M
    xmlNodePtr ret;
10113
5.55M
    int nsNr = ctxt->nsNr;
10114
10115
5.55M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
5.55M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
120
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
120
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
120
        xmlParserMaxDepth);
10120
120
  xmlHaltParser(ctxt);
10121
120
  return(-1);
10122
120
    }
10123
10124
    /* Capture start position */
10125
5.55M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
5.55M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
5.55M
    else if (*ctxt->space == -2)
10134
1.32M
  spacePush(ctxt, -1);
10135
4.22M
    else
10136
4.22M
  spacePush(ctxt, *ctxt->space);
10137
10138
5.55M
    line = ctxt->input->line;
10139
5.55M
#ifdef LIBXML_SAX1_ENABLED
10140
5.55M
    if (ctxt->sax2)
10141
3.63M
#endif /* LIBXML_SAX1_ENABLED */
10142
3.63M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
1.91M
#ifdef LIBXML_SAX1_ENABLED
10144
1.91M
    else
10145
1.91M
  name = xmlParseStartTag(ctxt);
10146
5.55M
#endif /* LIBXML_SAX1_ENABLED */
10147
5.55M
    if (ctxt->instate == XML_PARSER_EOF)
10148
239
  return(-1);
10149
5.55M
    if (name == NULL) {
10150
961k
  spacePop(ctxt);
10151
961k
        return(-1);
10152
961k
    }
10153
4.59M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
4.59M
    ret = ctxt->node;
10155
10156
4.59M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
4.59M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
4.59M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
4.59M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
4.59M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
922k
        SKIP(2);
10172
922k
  if (ctxt->sax2) {
10173
719k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
719k
    (!ctxt->disableSAX))
10175
570k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
719k
#ifdef LIBXML_SAX1_ENABLED
10177
719k
  } else {
10178
202k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
202k
    (!ctxt->disableSAX))
10180
171k
    ctxt->sax->endElement(ctxt->userData, name);
10181
202k
#endif /* LIBXML_SAX1_ENABLED */
10182
202k
  }
10183
922k
  namePop(ctxt);
10184
922k
  spacePop(ctxt);
10185
922k
  if (nsNr != ctxt->nsNr)
10186
21.1k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
922k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
922k
  return(1);
10195
922k
    }
10196
3.66M
    if (RAW == '>') {
10197
2.46M
        NEXT1;
10198
2.46M
    } else {
10199
1.20M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
1.20M
         "Couldn't find end of Start Tag %s line %d\n",
10201
1.20M
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
1.20M
  nodePop(ctxt);
10207
1.20M
  namePop(ctxt);
10208
1.20M
  spacePop(ctxt);
10209
1.20M
  if (nsNr != ctxt->nsNr)
10210
85.6k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
1.20M
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
1.20M
  return(-1);
10223
1.20M
    }
10224
10225
2.46M
    return(0);
10226
3.66M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.42M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.42M
    xmlParserNodeInfo node_info;
10237
1.42M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.42M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.42M
    if (ctxt->sax2) {
10249
964k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
964k
  namePop(ctxt);
10251
964k
    }
10252
463k
#ifdef LIBXML_SAX1_ENABLED
10253
463k
    else
10254
463k
  xmlParseEndTag1(ctxt, 0);
10255
1.42M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.42M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.42M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
318k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
318k
    xmlChar *buf = NULL;
10286
318k
    int len = 0;
10287
318k
    int size = 10;
10288
318k
    xmlChar cur;
10289
10290
318k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
318k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
318k
    cur = CUR;
10296
318k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
5.26k
  xmlFree(buf);
10298
5.26k
  return(NULL);
10299
5.26k
    }
10300
313k
    buf[len++] = cur;
10301
313k
    NEXT;
10302
313k
    cur=CUR;
10303
313k
    if (cur != '.') {
10304
5.34k
  xmlFree(buf);
10305
5.34k
  return(NULL);
10306
5.34k
    }
10307
307k
    buf[len++] = cur;
10308
307k
    NEXT;
10309
307k
    cur=CUR;
10310
697k
    while ((cur >= '0') && (cur <= '9')) {
10311
389k
  if (len + 1 >= size) {
10312
2.10k
      xmlChar *tmp;
10313
10314
2.10k
      size *= 2;
10315
2.10k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
2.10k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
2.10k
      buf = tmp;
10322
2.10k
  }
10323
389k
  buf[len++] = cur;
10324
389k
  NEXT;
10325
389k
  cur=CUR;
10326
389k
    }
10327
307k
    buf[len] = 0;
10328
307k
    return(buf);
10329
307k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
410k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
410k
    xmlChar *version = NULL;
10349
10350
410k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
336k
  SKIP(7);
10352
336k
  SKIP_BLANKS;
10353
336k
  if (RAW != '=') {
10354
10.9k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
10.9k
      return(NULL);
10356
10.9k
        }
10357
325k
  NEXT;
10358
325k
  SKIP_BLANKS;
10359
325k
  if (RAW == '"') {
10360
267k
      NEXT;
10361
267k
      version = xmlParseVersionNum(ctxt);
10362
267k
      if (RAW != '"') {
10363
18.3k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
18.3k
      } else
10365
248k
          NEXT;
10366
267k
  } else if (RAW == '\''){
10367
51.5k
      NEXT;
10368
51.5k
      version = xmlParseVersionNum(ctxt);
10369
51.5k
      if (RAW != '\'') {
10370
3.77k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
3.77k
      } else
10372
47.7k
          NEXT;
10373
51.5k
  } else {
10374
6.90k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
6.90k
  }
10376
325k
    }
10377
399k
    return(version);
10378
410k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
100k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
100k
    xmlChar *buf = NULL;
10395
100k
    int len = 0;
10396
100k
    int size = 10;
10397
100k
    xmlChar cur;
10398
10399
100k
    cur = CUR;
10400
100k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
100k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
99.8k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
99.8k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
99.8k
  buf[len++] = cur;
10409
99.8k
  NEXT;
10410
99.8k
  cur = CUR;
10411
1.08M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
1.08M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
1.08M
         ((cur >= '0') && (cur <= '9')) ||
10414
1.08M
         (cur == '.') || (cur == '_') ||
10415
1.08M
         (cur == '-')) {
10416
988k
      if (len + 1 >= size) {
10417
45.5k
          xmlChar *tmp;
10418
10419
45.5k
    size *= 2;
10420
45.5k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
45.5k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
45.5k
    buf = tmp;
10427
45.5k
      }
10428
988k
      buf[len++] = cur;
10429
988k
      NEXT;
10430
988k
      cur = CUR;
10431
988k
      if (cur == 0) {
10432
687
          SHRINK;
10433
687
    GROW;
10434
687
    cur = CUR;
10435
687
      }
10436
988k
        }
10437
99.8k
  buf[len] = 0;
10438
99.8k
    } else {
10439
1.03k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.03k
    }
10441
100k
    return(buf);
10442
100k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
294k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
294k
    xmlChar *encoding = NULL;
10462
10463
294k
    SKIP_BLANKS;
10464
294k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
102k
  SKIP(8);
10466
102k
  SKIP_BLANKS;
10467
102k
  if (RAW != '=') {
10468
500
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
500
      return(NULL);
10470
500
        }
10471
101k
  NEXT;
10472
101k
  SKIP_BLANKS;
10473
101k
  if (RAW == '"') {
10474
77.4k
      NEXT;
10475
77.4k
      encoding = xmlParseEncName(ctxt);
10476
77.4k
      if (RAW != '"') {
10477
4.40k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
4.40k
    xmlFree((xmlChar *) encoding);
10479
4.40k
    return(NULL);
10480
4.40k
      } else
10481
73.0k
          NEXT;
10482
77.4k
  } else if (RAW == '\''){
10483
23.4k
      NEXT;
10484
23.4k
      encoding = xmlParseEncName(ctxt);
10485
23.4k
      if (RAW != '\'') {
10486
1.97k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
1.97k
    xmlFree((xmlChar *) encoding);
10488
1.97k
    return(NULL);
10489
1.97k
      } else
10490
21.4k
          NEXT;
10491
23.4k
  } else {
10492
1.01k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
1.01k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
95.4k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
30.8k
      xmlFree((xmlChar *) encoding);
10500
30.8k
            return(NULL);
10501
30.8k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
64.6k
        if ((encoding != NULL) &&
10508
64.6k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
64.0k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
3.29k
      if ((ctxt->encoding == NULL) &&
10517
3.29k
          (ctxt->input->buf != NULL) &&
10518
3.29k
          (ctxt->input->buf->encoder == NULL)) {
10519
3.28k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
3.28k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
3.28k
      }
10522
3.29k
      if (ctxt->encoding != NULL)
10523
13
    xmlFree((xmlChar *) ctxt->encoding);
10524
3.29k
      ctxt->encoding = encoding;
10525
3.29k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
61.3k
        else if ((encoding != NULL) &&
10530
61.3k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
60.7k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
22.0k
      if (ctxt->encoding != NULL)
10533
12
    xmlFree((xmlChar *) ctxt->encoding);
10534
22.0k
      ctxt->encoding = encoding;
10535
22.0k
  }
10536
39.3k
  else if (encoding != NULL) {
10537
38.7k
      xmlCharEncodingHandlerPtr handler;
10538
10539
38.7k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
38.7k
      ctxt->input->encoding = encoding;
10542
10543
38.7k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
38.7k
      if (handler != NULL) {
10545
37.8k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
224
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
224
        return(NULL);
10549
224
    }
10550
37.8k
      } else {
10551
945
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
945
      "Unsupported encoding %s\n", encoding);
10553
945
    return(NULL);
10554
945
      }
10555
38.7k
  }
10556
64.6k
    }
10557
255k
    return(encoding);
10558
294k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
257k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
257k
    int standalone = -2;
10596
10597
257k
    SKIP_BLANKS;
10598
257k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
51.0k
  SKIP(10);
10600
51.0k
        SKIP_BLANKS;
10601
51.0k
  if (RAW != '=') {
10602
504
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
504
      return(standalone);
10604
504
        }
10605
50.4k
  NEXT;
10606
50.4k
  SKIP_BLANKS;
10607
50.4k
        if (RAW == '\''){
10608
8.30k
      NEXT;
10609
8.30k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
5.25k
          standalone = 0;
10611
5.25k
                SKIP(2);
10612
5.25k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
3.05k
                 (NXT(2) == 's')) {
10614
2.60k
          standalone = 1;
10615
2.60k
    SKIP(3);
10616
2.60k
            } else {
10617
450
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
450
      }
10619
8.30k
      if (RAW != '\'') {
10620
645
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
645
      } else
10622
7.66k
          NEXT;
10623
42.1k
  } else if (RAW == '"'){
10624
41.5k
      NEXT;
10625
41.5k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
22.8k
          standalone = 0;
10627
22.8k
    SKIP(2);
10628
22.8k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
18.6k
                 (NXT(2) == 's')) {
10630
13.4k
          standalone = 1;
10631
13.4k
                SKIP(3);
10632
13.4k
            } else {
10633
5.16k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
5.16k
      }
10635
41.5k
      if (RAW != '"') {
10636
5.55k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
5.55k
      } else
10638
35.9k
          NEXT;
10639
41.5k
  } else {
10640
657
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
657
        }
10642
50.4k
    }
10643
257k
    return(standalone);
10644
257k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
397k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
397k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
397k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
397k
    SKIP(5);
10672
10673
397k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
397k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
397k
    version = xmlParseVersionInfo(ctxt);
10683
397k
    if (version == NULL) {
10684
98.1k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
299k
    } else {
10686
299k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
33.2k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
9.92k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
9.92k
                "Unsupported version '%s'\n",
10693
9.92k
                version);
10694
23.3k
      } else {
10695
23.3k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
5.74k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
5.74k
                      "Unsupported version '%s'\n",
10698
5.74k
          version, NULL);
10699
17.5k
    } else {
10700
17.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
17.5k
              "Unsupported version '%s'\n",
10702
17.5k
              version);
10703
17.5k
    }
10704
23.3k
      }
10705
33.2k
  }
10706
299k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
299k
  ctxt->version = version;
10709
299k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
397k
    if (!IS_BLANK_CH(RAW)) {
10715
231k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
116k
      SKIP(2);
10717
116k
      return;
10718
116k
  }
10719
114k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
114k
    }
10721
280k
    xmlParseEncodingDecl(ctxt);
10722
280k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
280k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
889
        return;
10728
889
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
280k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
28.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
22.1k
      SKIP(2);
10736
22.1k
      return;
10737
22.1k
  }
10738
6.82k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
6.82k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
257k
    GROW;
10745
10746
257k
    SKIP_BLANKS;
10747
257k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
257k
    SKIP_BLANKS;
10750
257k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
95.5k
        SKIP(2);
10752
162k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
1.29k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
1.29k
  NEXT;
10756
161k
    } else {
10757
161k
        int c;
10758
10759
161k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
5.50M
        while ((c = CUR) != 0) {
10761
5.49M
            NEXT;
10762
5.49M
            if (c == '>')
10763
152k
                break;
10764
5.49M
        }
10765
161k
    }
10766
257k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
614k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
714k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
714k
        SKIP_BLANKS;
10783
714k
        GROW;
10784
714k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
74.2k
      xmlParsePI(ctxt);
10786
639k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
25.2k
      xmlParseComment(ctxt);
10788
614k
        } else {
10789
614k
            break;
10790
614k
        }
10791
714k
    }
10792
614k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
313k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
313k
    xmlChar start[4];
10812
313k
    xmlCharEncoding enc;
10813
10814
313k
    xmlInitParser();
10815
10816
313k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
313k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
313k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
313k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
313k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
313k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
313k
    if ((ctxt->encoding == NULL) &&
10835
313k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
308k
  start[0] = RAW;
10842
308k
  start[1] = NXT(1);
10843
308k
  start[2] = NXT(2);
10844
308k
  start[3] = NXT(3);
10845
308k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
308k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
160k
      xmlSwitchEncoding(ctxt, enc);
10848
160k
  }
10849
308k
    }
10850
10851
10852
313k
    if (CUR == 0) {
10853
1.46k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
1.46k
  return(-1);
10855
1.46k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
311k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
19.9k
       GROW;
10865
19.9k
    }
10866
311k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
132k
  xmlParseXMLDecl(ctxt);
10872
132k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
132k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
298
      return(-1);
10878
298
  }
10879
132k
  ctxt->standalone = ctxt->input->standalone;
10880
132k
  SKIP_BLANKS;
10881
179k
    } else {
10882
179k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
179k
    }
10884
311k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
291k
        ctxt->sax->startDocument(ctxt->userData);
10886
311k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
311k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
311k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
311k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
311k
    GROW;
10903
311k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
153k
  ctxt->inSubset = 1;
10906
153k
  xmlParseDocTypeDecl(ctxt);
10907
153k
  if (RAW == '[') {
10908
104k
      ctxt->instate = XML_PARSER_DTD;
10909
104k
      xmlParseInternalSubset(ctxt);
10910
104k
      if (ctxt->instate == XML_PARSER_EOF)
10911
53.8k
    return(-1);
10912
104k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
100k
  ctxt->inSubset = 2;
10918
100k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
100k
      (!ctxt->disableSAX))
10920
90.4k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
90.4k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
100k
  if (ctxt->instate == XML_PARSER_EOF)
10923
9.37k
      return(-1);
10924
90.6k
  ctxt->inSubset = 0;
10925
10926
90.6k
        xmlCleanSpecialAttr(ctxt);
10927
10928
90.6k
  ctxt->instate = XML_PARSER_PROLOG;
10929
90.6k
  xmlParseMisc(ctxt);
10930
90.6k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
248k
    GROW;
10936
248k
    if (RAW != '<') {
10937
36.0k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
36.0k
           "Start tag expected, '<' not found\n");
10939
212k
    } else {
10940
212k
  ctxt->instate = XML_PARSER_CONTENT;
10941
212k
  xmlParseElement(ctxt);
10942
212k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
212k
  xmlParseMisc(ctxt);
10949
10950
212k
  if (RAW != 0) {
10951
80.8k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
80.8k
  }
10953
212k
  ctxt->instate = XML_PARSER_EOF;
10954
212k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
248k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
248k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
248k
    if ((ctxt->myDoc != NULL) &&
10966
248k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
863
  xmlFreeDoc(ctxt->myDoc);
10968
863
  ctxt->myDoc = NULL;
10969
863
    }
10970
10971
248k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
9.67k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
9.67k
  if (ctxt->valid)
10974
6.54k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
9.67k
  if (ctxt->nsWellFormed)
10976
8.92k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
9.67k
  if (ctxt->options & XML_PARSE_OLD10)
10978
1.26k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
9.67k
    }
10980
248k
    if (! ctxt->wellFormed) {
10981
238k
  ctxt->valid = 0;
10982
238k
  return(-1);
10983
238k
    }
10984
9.67k
    return(0);
10985
248k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
3.80M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
3.80M
    const xmlChar *cur;
11110
11111
3.80M
    if (ctxt->checkIndex == 0) {
11112
3.53M
        cur = ctxt->input->cur + 1;
11113
3.53M
    } else {
11114
270k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
270k
    }
11116
11117
3.80M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
294k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
294k
        return(0);
11120
3.50M
    } else {
11121
3.50M
        ctxt->checkIndex = 0;
11122
3.50M
        return(1);
11123
3.50M
    }
11124
3.80M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
2.09M
                     const char *str, size_t strLen) {
11138
2.09M
    const xmlChar *cur, *term;
11139
11140
2.09M
    if (ctxt->checkIndex == 0) {
11141
1.09M
        cur = ctxt->input->cur + startDelta;
11142
1.09M
    } else {
11143
1.00M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.00M
    }
11145
11146
2.09M
    term = BAD_CAST strstr((const char *) cur, str);
11147
2.09M
    if (term == NULL) {
11148
1.19M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
1.19M
        if ((size_t) (end - cur) < strLen)
11152
47.6k
            end = cur;
11153
1.15M
        else
11154
1.15M
            end -= strLen - 1;
11155
1.19M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
1.19M
    } else {
11157
897k
        ctxt->checkIndex = 0;
11158
897k
    }
11159
11160
2.09M
    return(term);
11161
2.09M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
8.82M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
8.82M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
8.82M
    const xmlChar *end = ctxt->input->end;
11173
11174
230M
    while (cur < end) {
11175
230M
        if ((*cur == '<') || (*cur == '&')) {
11176
8.23M
            ctxt->checkIndex = 0;
11177
8.23M
            return(1);
11178
8.23M
        }
11179
221M
        cur++;
11180
221M
    }
11181
11182
592k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
592k
    return(0);
11184
8.82M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
7.65M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
7.65M
    const xmlChar *cur;
11196
7.65M
    const xmlChar *end = ctxt->input->end;
11197
7.65M
    int state = ctxt->endCheckState;
11198
11199
7.65M
    if (ctxt->checkIndex == 0)
11200
5.79M
        cur = ctxt->input->cur + 1;
11201
1.86M
    else
11202
1.86M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
515M
    while (cur < end) {
11205
513M
        if (state) {
11206
281M
            if (*cur == state)
11207
7.79M
                state = 0;
11208
281M
        } else if (*cur == '\'' || *cur == '"') {
11209
7.89M
            state = *cur;
11210
224M
        } else if (*cur == '>') {
11211
5.63M
            ctxt->checkIndex = 0;
11212
5.63M
            ctxt->endCheckState = 0;
11213
5.63M
            return(1);
11214
5.63M
        }
11215
508M
        cur++;
11216
508M
    }
11217
11218
2.02M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
2.02M
    ctxt->endCheckState = state;
11220
2.02M
    return(0);
11221
7.65M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
594k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
594k
    const xmlChar *cur, *start;
11240
594k
    const xmlChar *end = ctxt->input->end;
11241
594k
    int state = ctxt->endCheckState;
11242
11243
594k
    if (ctxt->checkIndex == 0) {
11244
171k
        cur = ctxt->input->cur + 1;
11245
422k
    } else {
11246
422k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
422k
    }
11248
594k
    start = cur;
11249
11250
101M
    while (cur < end) {
11251
101M
        if (state == '-') {
11252
22.3M
            if ((*cur == '-') &&
11253
22.3M
                (cur[1] == '-') &&
11254
22.3M
                (cur[2] == '>')) {
11255
147k
                state = 0;
11256
147k
                cur += 3;
11257
147k
                start = cur;
11258
147k
                continue;
11259
147k
            }
11260
22.3M
        }
11261
78.6M
        else if (state == ']') {
11262
345k
            if (*cur == '>') {
11263
110k
                ctxt->checkIndex = 0;
11264
110k
                ctxt->endCheckState = 0;
11265
110k
                return(1);
11266
110k
            }
11267
235k
            if (IS_BLANK_CH(*cur)) {
11268
19.0k
                state = ' ';
11269
216k
            } else if (*cur != ']') {
11270
19.8k
                state = 0;
11271
19.8k
                start = cur;
11272
19.8k
                continue;
11273
19.8k
            }
11274
235k
        }
11275
78.3M
        else if (state == ' ') {
11276
130k
            if (*cur == '>') {
11277
1.10k
                ctxt->checkIndex = 0;
11278
1.10k
                ctxt->endCheckState = 0;
11279
1.10k
                return(1);
11280
1.10k
            }
11281
129k
            if (!IS_BLANK_CH(*cur)) {
11282
17.8k
                state = 0;
11283
17.8k
                start = cur;
11284
17.8k
                continue;
11285
17.8k
            }
11286
129k
        }
11287
78.1M
        else if (state != 0) {
11288
35.0M
            if (*cur == state) {
11289
710k
                state = 0;
11290
710k
                start = cur + 1;
11291
710k
            }
11292
35.0M
        }
11293
43.1M
        else if (*cur == '<') {
11294
1.29M
            if ((cur[1] == '!') &&
11295
1.29M
                (cur[2] == '-') &&
11296
1.29M
                (cur[3] == '-')) {
11297
151k
                state = '-';
11298
151k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
151k
                start = cur;
11301
151k
                continue;
11302
151k
            }
11303
1.29M
        }
11304
41.9M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
881k
            state = *cur;
11306
881k
        }
11307
11308
100M
        cur++;
11309
100M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
482k
    if ((state == 0) || (state == '-')) {
11316
307k
        if (cur - start < 3)
11317
17.7k
            cur = start;
11318
289k
        else
11319
289k
            cur -= 3;
11320
307k
    }
11321
482k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
482k
    ctxt->endCheckState = state;
11323
482k
    return(0);
11324
594k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
298k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
298k
    int ix;
11340
298k
    unsigned char c;
11341
298k
    int codepoint;
11342
11343
298k
    if ((utf == NULL) || (len <= 0))
11344
19.8k
        return(0);
11345
11346
12.1M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
12.0M
        c = utf[ix];
11348
12.0M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
9.79M
      if (c >= 0x20)
11350
9.35M
    ix++;
11351
439k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
411k
          ix++;
11353
28.7k
      else
11354
28.7k
          return(-ix);
11355
9.79M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
1.59M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
1.59M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
26.8k
          return(-ix);
11359
1.56M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
1.56M
      codepoint |= utf[ix+1] & 0x3f;
11361
1.56M
      if (!xmlIsCharQ(codepoint))
11362
9.13k
          return(-ix);
11363
1.55M
      ix += 2;
11364
1.55M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
307k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
302k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
302k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
11.5k
        return(-ix);
11369
290k
      codepoint = (utf[ix] & 0xf) << 12;
11370
290k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
290k
      codepoint |= utf[ix+2] & 0x3f;
11372
290k
      if (!xmlIsCharQ(codepoint))
11373
9.81k
          return(-ix);
11374
280k
      ix += 3;
11375
305k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
262k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
257k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
257k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
257k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
17.5k
        return(-ix);
11381
240k
      codepoint = (utf[ix] & 0x7) << 18;
11382
240k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
240k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
240k
      codepoint |= utf[ix+3] & 0x3f;
11385
240k
      if (!xmlIsCharQ(codepoint))
11386
11.3k
          return(-ix);
11387
228k
      ix += 4;
11388
228k
  } else       /* unknown encoding */
11389
42.9k
      return(-ix);
11390
12.0M
      }
11391
103k
      return(ix);
11392
278k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
5.41M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
5.41M
    int ret = 0;
11406
5.41M
    int avail, tlen;
11407
5.41M
    xmlChar cur, next;
11408
11409
5.41M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
5.41M
    if ((ctxt->input != NULL) &&
11466
5.41M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
53.9k
        xmlParserInputShrink(ctxt->input);
11468
53.9k
    }
11469
11470
53.6M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
53.6M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
177k
      return(0);
11473
11474
53.4M
  if (ctxt->input == NULL) break;
11475
53.4M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
53.4M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
53.4M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
53.4M
          (ctxt->input->buf->raw != NULL) &&
11488
53.4M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
967k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
967k
                                                 ctxt->input);
11491
967k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
967k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
967k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
967k
                                      base, current);
11496
967k
      }
11497
53.4M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
53.4M
        (ctxt->input->cur - ctxt->input->base);
11499
53.4M
  }
11500
53.4M
        if (avail < 1)
11501
224k
      goto done;
11502
53.2M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
1.79M
            case XML_PARSER_START:
11509
1.79M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
482k
        xmlChar start[4];
11511
482k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
482k
        if (avail < 4)
11517
26.7k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
455k
        start[0] = RAW;
11527
455k
        start[1] = NXT(1);
11528
455k
        start[2] = NXT(2);
11529
455k
        start[3] = NXT(3);
11530
455k
        enc = xmlDetectCharEncoding(start, 4);
11531
455k
        xmlSwitchEncoding(ctxt, enc);
11532
455k
        break;
11533
482k
    }
11534
11535
1.31M
    if (avail < 2)
11536
201
        goto done;
11537
1.31M
    cur = ctxt->input->cur[0];
11538
1.31M
    next = ctxt->input->cur[1];
11539
1.31M
    if (cur == 0) {
11540
2.30k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
2.30k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
2.30k
                  &xmlDefaultSAXLocator);
11543
2.30k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
2.30k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
2.30k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
2.30k
      ctxt->sax->endDocument(ctxt->userData);
11551
2.30k
        goto done;
11552
2.30k
    }
11553
1.31M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
1.03M
        if (avail < 5) goto done;
11556
1.03M
        if ((!terminate) &&
11557
1.03M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
697k
      goto done;
11559
336k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
336k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
336k
                  &xmlDefaultSAXLocator);
11562
336k
        if ((ctxt->input->cur[2] == 'x') &&
11563
336k
      (ctxt->input->cur[3] == 'm') &&
11564
336k
      (ctxt->input->cur[4] == 'l') &&
11565
336k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
264k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
264k
      xmlParseXMLDecl(ctxt);
11572
264k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
591
          xmlHaltParser(ctxt);
11578
591
          return(0);
11579
591
      }
11580
264k
      ctxt->standalone = ctxt->input->standalone;
11581
264k
      if ((ctxt->encoding == NULL) &&
11582
264k
          (ctxt->input->encoding != NULL))
11583
24.5k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
264k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
264k
          (!ctxt->disableSAX))
11586
223k
          ctxt->sax->startDocument(ctxt->userData);
11587
264k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
264k
        } else {
11593
71.3k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
71.3k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
71.3k
          (!ctxt->disableSAX))
11596
71.3k
          ctxt->sax->startDocument(ctxt->userData);
11597
71.3k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
71.3k
        }
11603
336k
    } else {
11604
274k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
274k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
274k
                  &xmlDefaultSAXLocator);
11607
274k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
274k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
274k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
274k
            (!ctxt->disableSAX))
11614
274k
      ctxt->sax->startDocument(ctxt->userData);
11615
274k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
274k
    }
11621
609k
    break;
11622
9.52M
            case XML_PARSER_START_TAG: {
11623
9.52M
          const xmlChar *name;
11624
9.52M
    const xmlChar *prefix = NULL;
11625
9.52M
    const xmlChar *URI = NULL;
11626
9.52M
                int line = ctxt->input->line;
11627
9.52M
    int nsNr = ctxt->nsNr;
11628
11629
9.52M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
9.52M
    cur = ctxt->input->cur[0];
11632
9.52M
          if (cur != '<') {
11633
26.5k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
26.5k
        xmlHaltParser(ctxt);
11635
26.5k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
26.5k
      ctxt->sax->endDocument(ctxt->userData);
11637
26.5k
        goto done;
11638
26.5k
    }
11639
9.50M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
1.86M
                    goto done;
11641
7.63M
    if (ctxt->spaceNr == 0)
11642
384k
        spacePush(ctxt, -1);
11643
7.24M
    else if (*ctxt->space == -2)
11644
1.55M
        spacePush(ctxt, -1);
11645
5.69M
    else
11646
5.69M
        spacePush(ctxt, *ctxt->space);
11647
7.63M
#ifdef LIBXML_SAX1_ENABLED
11648
7.63M
    if (ctxt->sax2)
11649
4.91M
#endif /* LIBXML_SAX1_ENABLED */
11650
4.91M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
2.72M
#ifdef LIBXML_SAX1_ENABLED
11652
2.72M
    else
11653
2.72M
        name = xmlParseStartTag(ctxt);
11654
7.63M
#endif /* LIBXML_SAX1_ENABLED */
11655
7.63M
    if (ctxt->instate == XML_PARSER_EOF)
11656
611
        goto done;
11657
7.63M
    if (name == NULL) {
11658
37.4k
        spacePop(ctxt);
11659
37.4k
        xmlHaltParser(ctxt);
11660
37.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
37.4k
      ctxt->sax->endDocument(ctxt->userData);
11662
37.4k
        goto done;
11663
37.4k
    }
11664
7.59M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
7.59M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
7.59M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
7.59M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
7.59M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
1.36M
        SKIP(2);
11680
11681
1.36M
        if (ctxt->sax2) {
11682
1.07M
      if ((ctxt->sax != NULL) &&
11683
1.07M
          (ctxt->sax->endElementNs != NULL) &&
11684
1.07M
          (!ctxt->disableSAX))
11685
1.07M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
1.07M
                                  prefix, URI);
11687
1.07M
      if (ctxt->nsNr - nsNr > 0)
11688
26.0k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
1.07M
#ifdef LIBXML_SAX1_ENABLED
11690
1.07M
        } else {
11691
295k
      if ((ctxt->sax != NULL) &&
11692
295k
          (ctxt->sax->endElement != NULL) &&
11693
295k
          (!ctxt->disableSAX))
11694
295k
          ctxt->sax->endElement(ctxt->userData, name);
11695
295k
#endif /* LIBXML_SAX1_ENABLED */
11696
295k
        }
11697
1.36M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
1.36M
        spacePop(ctxt);
11700
1.36M
        if (ctxt->nameNr == 0) {
11701
4.76k
      ctxt->instate = XML_PARSER_EPILOG;
11702
1.36M
        } else {
11703
1.36M
      ctxt->instate = XML_PARSER_CONTENT;
11704
1.36M
        }
11705
1.36M
        break;
11706
1.36M
    }
11707
6.22M
    if (RAW == '>') {
11708
3.46M
        NEXT;
11709
3.46M
    } else {
11710
2.75M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
2.75M
           "Couldn't find end of Start Tag %s\n",
11712
2.75M
           name);
11713
2.75M
        nodePop(ctxt);
11714
2.75M
        spacePop(ctxt);
11715
2.75M
    }
11716
6.22M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
6.22M
    ctxt->instate = XML_PARSER_CONTENT;
11719
6.22M
                break;
11720
7.59M
      }
11721
37.4M
            case XML_PARSER_CONTENT: {
11722
37.4M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
82.0k
        goto done;
11724
37.4M
    cur = ctxt->input->cur[0];
11725
37.4M
    next = ctxt->input->cur[1];
11726
11727
37.4M
    if ((cur == '<') && (next == '/')) {
11728
2.17M
        ctxt->instate = XML_PARSER_END_TAG;
11729
2.17M
        break;
11730
35.2M
          } else if ((cur == '<') && (next == '?')) {
11731
362k
        if ((!terminate) &&
11732
362k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
104k
      goto done;
11734
257k
        xmlParsePI(ctxt);
11735
257k
        ctxt->instate = XML_PARSER_CONTENT;
11736
34.8M
    } else if ((cur == '<') && (next != '!')) {
11737
7.29M
        ctxt->instate = XML_PARSER_START_TAG;
11738
7.29M
        break;
11739
27.5M
    } else if ((cur == '<') && (next == '!') &&
11740
27.5M
               (ctxt->input->cur[2] == '-') &&
11741
27.5M
         (ctxt->input->cur[3] == '-')) {
11742
433k
        if ((!terminate) &&
11743
433k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
136k
      goto done;
11745
297k
        xmlParseComment(ctxt);
11746
297k
        ctxt->instate = XML_PARSER_CONTENT;
11747
27.1M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
27.1M
        (ctxt->input->cur[2] == '[') &&
11749
27.1M
        (ctxt->input->cur[3] == 'C') &&
11750
27.1M
        (ctxt->input->cur[4] == 'D') &&
11751
27.1M
        (ctxt->input->cur[5] == 'A') &&
11752
27.1M
        (ctxt->input->cur[6] == 'T') &&
11753
27.1M
        (ctxt->input->cur[7] == 'A') &&
11754
27.1M
        (ctxt->input->cur[8] == '[')) {
11755
111k
        SKIP(9);
11756
111k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
111k
        break;
11758
27.0M
    } else if ((cur == '<') && (next == '!') &&
11759
27.0M
               (avail < 9)) {
11760
23.2k
        goto done;
11761
27.0M
    } else if (cur == '<') {
11762
1.15M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
1.15M
                    "detected an error in element content\n");
11764
1.15M
                    SKIP(1);
11765
25.8M
    } else if (cur == '&') {
11766
2.76M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
224k
      goto done;
11768
2.54M
        xmlParseReference(ctxt);
11769
23.0M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
23.0M
        if ((ctxt->inputNr == 1) &&
11783
23.0M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
10.4M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
592k
          goto done;
11786
10.4M
                    }
11787
22.4M
                    ctxt->checkIndex = 0;
11788
22.4M
        xmlParseCharData(ctxt, 0);
11789
22.4M
    }
11790
26.7M
    break;
11791
37.4M
      }
11792
26.7M
            case XML_PARSER_END_TAG:
11793
2.24M
    if (avail < 2)
11794
0
        goto done;
11795
2.24M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
69.5k
        goto done;
11797
2.17M
    if (ctxt->sax2) {
11798
1.42M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.42M
        nameNsPop(ctxt);
11800
1.42M
    }
11801
751k
#ifdef LIBXML_SAX1_ENABLED
11802
751k
      else
11803
751k
        xmlParseEndTag1(ctxt, 0);
11804
2.17M
#endif /* LIBXML_SAX1_ENABLED */
11805
2.17M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
2.17M
    } else if (ctxt->nameNr == 0) {
11808
32.7k
        ctxt->instate = XML_PARSER_EPILOG;
11809
2.14M
    } else {
11810
2.14M
        ctxt->instate = XML_PARSER_CONTENT;
11811
2.14M
    }
11812
2.17M
    break;
11813
384k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
384k
    const xmlChar *term;
11819
11820
384k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
36.8k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
36.8k
                                           "]]>");
11827
348k
                } else {
11828
348k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
348k
                }
11830
11831
384k
    if (term == NULL) {
11832
209k
        int tmp, size;
11833
11834
209k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
8.06k
                        size = ctxt->input->end - ctxt->input->cur;
11837
201k
                    } else {
11838
201k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
86.1k
                            goto done;
11840
115k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
115k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
115k
                    }
11844
123k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
123k
                    if (tmp <= 0) {
11846
89.1k
                        tmp = -tmp;
11847
89.1k
                        ctxt->input->cur += tmp;
11848
89.1k
                        goto encoding_error;
11849
89.1k
                    }
11850
34.2k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
34.2k
                        if (ctxt->sax->cdataBlock != NULL)
11852
21.8k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
21.8k
                                                  ctxt->input->cur, tmp);
11854
12.4k
                        else if (ctxt->sax->characters != NULL)
11855
12.4k
                            ctxt->sax->characters(ctxt->userData,
11856
12.4k
                                                  ctxt->input->cur, tmp);
11857
34.2k
                    }
11858
34.2k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
34.2k
                    SKIPL(tmp);
11861
175k
    } else {
11862
175k
                    int base = term - CUR_PTR;
11863
175k
        int tmp;
11864
11865
175k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
175k
        if ((tmp < 0) || (tmp != base)) {
11867
81.5k
      tmp = -tmp;
11868
81.5k
      ctxt->input->cur += tmp;
11869
81.5k
      goto encoding_error;
11870
81.5k
        }
11871
93.8k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
93.8k
            (ctxt->sax->cdataBlock != NULL) &&
11873
93.8k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
12.6k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
12.6k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
12.6k
                     "<![CDATA[", 9)))
11882
12.6k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
12.6k
                                 BAD_CAST "", 0);
11884
81.2k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
81.2k
      (!ctxt->disableSAX)) {
11886
73.9k
      if (ctxt->sax->cdataBlock != NULL)
11887
50.0k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
50.0k
              ctxt->input->cur, base);
11889
23.9k
      else if (ctxt->sax->characters != NULL)
11890
23.9k
          ctxt->sax->characters(ctxt->userData,
11891
23.9k
              ctxt->input->cur, base);
11892
73.9k
        }
11893
93.8k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
93.8k
        SKIPL(base + 3);
11896
93.8k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
93.8k
    }
11902
128k
    break;
11903
384k
      }
11904
905k
            case XML_PARSER_MISC:
11905
1.09M
            case XML_PARSER_PROLOG:
11906
1.13M
            case XML_PARSER_EPILOG:
11907
1.13M
    SKIP_BLANKS;
11908
1.13M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.13M
    else
11912
1.13M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.13M
                (ctxt->input->cur - ctxt->input->base);
11914
1.13M
    if (avail < 2)
11915
30.0k
        goto done;
11916
1.10M
    cur = ctxt->input->cur[0];
11917
1.10M
    next = ctxt->input->cur[1];
11918
1.10M
          if ((cur == '<') && (next == '?')) {
11919
142k
        if ((!terminate) &&
11920
142k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
28.8k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
113k
        xmlParsePI(ctxt);
11927
113k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
962k
    } else if ((cur == '<') && (next == '!') &&
11930
962k
        (ctxt->input->cur[2] == '-') &&
11931
962k
        (ctxt->input->cur[3] == '-')) {
11932
64.9k
        if ((!terminate) &&
11933
64.9k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
30.6k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
34.3k
        xmlParseComment(ctxt);
11940
34.3k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
897k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
897k
                    (cur == '<') && (next == '!') &&
11944
897k
        (ctxt->input->cur[2] == 'D') &&
11945
897k
        (ctxt->input->cur[3] == 'O') &&
11946
897k
        (ctxt->input->cur[4] == 'C') &&
11947
897k
        (ctxt->input->cur[5] == 'T') &&
11948
897k
        (ctxt->input->cur[6] == 'Y') &&
11949
897k
        (ctxt->input->cur[7] == 'P') &&
11950
897k
        (ctxt->input->cur[8] == 'E')) {
11951
442k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
158k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
283k
        ctxt->inSubset = 1;
11958
283k
        xmlParseDocTypeDecl(ctxt);
11959
283k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
283k
        if (RAW == '[') {
11962
193k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
193k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
89.4k
      ctxt->inSubset = 2;
11972
89.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
89.4k
          (ctxt->sax->externalSubset != NULL))
11974
84.0k
          ctxt->sax->externalSubset(ctxt->userData,
11975
84.0k
            ctxt->intSubName, ctxt->extSubSystem,
11976
84.0k
            ctxt->extSubURI);
11977
89.4k
      ctxt->inSubset = 0;
11978
89.4k
      xmlCleanSpecialAttr(ctxt);
11979
89.4k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
89.4k
        }
11985
455k
    } else if ((cur == '<') && (next == '!') &&
11986
455k
               (avail <
11987
38.0k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
34.6k
        goto done;
11989
420k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
10.2k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
10.2k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
10.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
10.2k
      ctxt->sax->endDocument(ctxt->userData);
11998
10.2k
        goto done;
11999
410k
                } else {
12000
410k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
410k
    }
12006
841k
    break;
12007
841k
            case XML_PARSER_DTD: {
12008
654k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
482k
                    goto done;
12010
171k
    xmlParseInternalSubset(ctxt);
12011
171k
    if (ctxt->instate == XML_PARSER_EOF)
12012
74.4k
        goto done;
12013
97.4k
    ctxt->inSubset = 2;
12014
97.4k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
97.4k
        (ctxt->sax->externalSubset != NULL))
12016
94.2k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
94.2k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
97.4k
    ctxt->inSubset = 0;
12019
97.4k
    xmlCleanSpecialAttr(ctxt);
12020
97.4k
    if (ctxt->instate == XML_PARSER_EOF)
12021
4.44k
        goto done;
12022
93.0k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
93.0k
                break;
12028
97.4k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
53.2M
  }
12102
53.2M
    }
12103
5.06M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
5.06M
    return(ret);
12108
170k
encoding_error:
12109
170k
    {
12110
170k
        char buffer[150];
12111
12112
170k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
170k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
170k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
170k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
170k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
170k
         BAD_CAST buffer, NULL);
12118
170k
    }
12119
170k
    return(0);
12120
5.41M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
6.45M
              int terminate) {
12136
6.45M
    int end_in_lf = 0;
12137
6.45M
    int remain = 0;
12138
12139
6.45M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
6.45M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
1.15M
        return(ctxt->errNo);
12143
5.30M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.56k
        return(-1);
12145
5.29M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
5.29M
    ctxt->progressive = 1;
12149
5.29M
    if (ctxt->instate == XML_PARSER_START)
12150
1.23M
        xmlDetectSAX2(ctxt);
12151
5.29M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
5.29M
        (chunk[size - 1] == '\r')) {
12153
33.3k
  end_in_lf = 1;
12154
33.3k
  size--;
12155
33.3k
    }
12156
12157
5.41M
xmldecl_done:
12158
12159
5.41M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
5.41M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
4.99M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
4.99M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
4.99M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
4.99M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
4.99M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
184k
            unsigned int len = 45;
12173
12174
184k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
184k
                               BAD_CAST "UTF-16")) ||
12176
184k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
2.42k
                               BAD_CAST "UTF16")))
12178
182k
                len = 90;
12179
2.42k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
2.42k
                                    BAD_CAST "UCS-4")) ||
12181
2.42k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
566
                                    BAD_CAST "UCS4")))
12183
1.85k
                len = 180;
12184
12185
184k
            if (ctxt->input->buf->rawconsumed < len)
12186
12.5k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
184k
            if ((unsigned int) size > len) {
12194
116k
                remain = size - len;
12195
116k
                size = len;
12196
116k
            } else {
12197
68.0k
                remain = 0;
12198
68.0k
            }
12199
184k
        }
12200
4.99M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
4.99M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
4.99M
  if (res < 0) {
12203
1.77k
      ctxt->errNo = XML_PARSER_EOF;
12204
1.77k
      xmlHaltParser(ctxt);
12205
1.77k
      return (XML_PARSER_EOF);
12206
1.77k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
4.99M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
422k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
422k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
422k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
422k
        (in->raw != NULL)) {
12216
40.4k
    int nbchars;
12217
40.4k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
40.4k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
40.4k
    nbchars = xmlCharEncInput(in, terminate);
12221
40.4k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
40.4k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
2.80k
        xmlGenericError(xmlGenericErrorContext,
12225
2.80k
            "xmlParseChunk: encoder error\n");
12226
2.80k
                    xmlHaltParser(ctxt);
12227
2.80k
        return(XML_ERR_INVALID_ENCODING);
12228
2.80k
    }
12229
40.4k
      }
12230
422k
  }
12231
422k
    }
12232
12233
5.41M
    if (remain != 0) {
12234
116k
        xmlParseTryOrFinish(ctxt, 0);
12235
5.29M
    } else {
12236
5.29M
        xmlParseTryOrFinish(ctxt, terminate);
12237
5.29M
    }
12238
5.41M
    if (ctxt->instate == XML_PARSER_EOF)
12239
157k
        return(ctxt->errNo);
12240
12241
5.25M
    if ((ctxt->input != NULL) &&
12242
5.25M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
5.25M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
5.25M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
5.25M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
180k
        return(ctxt->errNo);
12250
12251
5.07M
    if (remain != 0) {
12252
115k
        chunk += size;
12253
115k
        size = remain;
12254
115k
        remain = 0;
12255
115k
        goto xmldecl_done;
12256
115k
    }
12257
4.95M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
4.95M
        (ctxt->input->buf != NULL)) {
12259
32.2k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
32.2k
           ctxt->input);
12261
32.2k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
32.2k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
32.2k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
32.2k
            base, current);
12267
32.2k
    }
12268
4.95M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
177k
  int cur_avail = 0;
12273
12274
177k
  if (ctxt->input != NULL) {
12275
177k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
177k
      else
12279
177k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
177k
                    (ctxt->input->cur - ctxt->input->base);
12281
177k
  }
12282
12283
177k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
177k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
155k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
155k
  }
12287
177k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
439
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
439
  }
12290
177k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
177k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
177k
    ctxt->sax->endDocument(ctxt->userData);
12293
177k
  }
12294
177k
  ctxt->instate = XML_PARSER_EOF;
12295
177k
    }
12296
4.95M
    if (ctxt->wellFormed == 0)
12297
2.46M
  return((xmlParserErrors) ctxt->errNo);
12298
2.49M
    else
12299
2.49M
        return(0);
12300
4.95M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
627k
                        const char *chunk, int size, const char *filename) {
12330
627k
    xmlParserCtxtPtr ctxt;
12331
627k
    xmlParserInputPtr inputStream;
12332
627k
    xmlParserInputBufferPtr buf;
12333
627k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
627k
    if ((chunk != NULL) && (size >= 4))
12339
308k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
627k
    buf = xmlAllocParserInputBuffer(enc);
12342
627k
    if (buf == NULL) return(NULL);
12343
12344
627k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
627k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
627k
    ctxt->dictNames = 1;
12351
627k
    if (filename == NULL) {
12352
313k
  ctxt->directory = NULL;
12353
313k
    } else {
12354
313k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
313k
    }
12356
12357
627k
    inputStream = xmlNewInputStream(ctxt);
12358
627k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
627k
    if (filename == NULL)
12365
313k
  inputStream->filename = NULL;
12366
313k
    else {
12367
313k
  inputStream->filename = (char *)
12368
313k
      xmlCanonicPath((const xmlChar *) filename);
12369
313k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
313k
    }
12376
627k
    inputStream->buf = buf;
12377
627k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
627k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
627k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
627k
    if ((size != 0) && (chunk != NULL) &&
12388
627k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
308k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
308k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
308k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
308k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
308k
    }
12399
12400
627k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
160k
        xmlSwitchEncoding(ctxt, enc);
12402
160k
    }
12403
12404
627k
    return(ctxt);
12405
627k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
557k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
557k
    if (ctxt == NULL)
12418
0
        return;
12419
557k
    ctxt->instate = XML_PARSER_EOF;
12420
557k
    ctxt->disableSAX = 1;
12421
565k
    while (ctxt->inputNr > 1)
12422
8.06k
        xmlFreeInputStream(inputPop(ctxt));
12423
557k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
557k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
557k
        if (ctxt->input->buf != NULL) {
12433
494k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
494k
            ctxt->input->buf = NULL;
12435
494k
        }
12436
557k
  ctxt->input->cur = BAD_CAST"";
12437
557k
        ctxt->input->length = 0;
12438
557k
  ctxt->input->base = ctxt->input->cur;
12439
557k
        ctxt->input->end = ctxt->input->cur;
12440
557k
    }
12441
557k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
314k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
314k
    if (ctxt == NULL)
12452
0
        return;
12453
314k
    xmlHaltParser(ctxt);
12454
314k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
314k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
45.3k
          const xmlChar *ID, xmlNodePtr *list) {
12832
45.3k
    xmlParserCtxtPtr ctxt;
12833
45.3k
    xmlDocPtr newDoc;
12834
45.3k
    xmlNodePtr newRoot;
12835
45.3k
    xmlParserErrors ret = XML_ERR_OK;
12836
45.3k
    xmlChar start[4];
12837
45.3k
    xmlCharEncoding enc;
12838
12839
45.3k
    if (((depth > 40) &&
12840
45.3k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
45.3k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
45.3k
    if (list != NULL)
12848
13.3k
        *list = NULL;
12849
45.3k
    if ((URL == NULL) && (ID == NULL))
12850
284
  return(XML_ERR_INTERNAL_ERROR);
12851
45.0k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
45.0k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
45.0k
                                             oldctxt);
12856
45.0k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
9.91k
    if (oldctxt != NULL) {
12858
9.91k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
9.91k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
9.91k
    }
12861
9.91k
    xmlDetectSAX2(ctxt);
12862
12863
9.91k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
9.91k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
9.91k
    newDoc->properties = XML_DOC_INTERNAL;
12869
9.91k
    if (doc) {
12870
9.91k
        newDoc->intSubset = doc->intSubset;
12871
9.91k
        newDoc->extSubset = doc->extSubset;
12872
9.91k
        if (doc->dict) {
12873
5.99k
            newDoc->dict = doc->dict;
12874
5.99k
            xmlDictReference(newDoc->dict);
12875
5.99k
        }
12876
9.91k
        if (doc->URL != NULL) {
12877
5.90k
            newDoc->URL = xmlStrdup(doc->URL);
12878
5.90k
        }
12879
9.91k
    }
12880
9.91k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
9.91k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
9.91k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
9.91k
    nodePush(ctxt, newDoc->children);
12891
9.91k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
9.91k
    } else {
12894
9.91k
        ctxt->myDoc = doc;
12895
9.91k
        newRoot->doc = doc;
12896
9.91k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
9.91k
    GROW;
12904
9.91k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
9.45k
  start[0] = RAW;
12906
9.45k
  start[1] = NXT(1);
12907
9.45k
  start[2] = NXT(2);
12908
9.45k
  start[3] = NXT(3);
12909
9.45k
  enc = xmlDetectCharEncoding(start, 4);
12910
9.45k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
1.51k
      xmlSwitchEncoding(ctxt, enc);
12912
1.51k
  }
12913
9.45k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
9.91k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
923
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
923
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
923
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
248
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
248
                           "Version mismatch between document and entity\n");
12927
248
        }
12928
923
    }
12929
12930
9.91k
    ctxt->instate = XML_PARSER_CONTENT;
12931
9.91k
    ctxt->depth = depth;
12932
9.91k
    if (oldctxt != NULL) {
12933
9.91k
  ctxt->_private = oldctxt->_private;
12934
9.91k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
9.91k
  ctxt->validate = oldctxt->validate;
12936
9.91k
  ctxt->valid = oldctxt->valid;
12937
9.91k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
9.91k
        if (oldctxt->validate) {
12939
3.84k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
3.84k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
3.84k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
3.84k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
3.84k
        }
12944
9.91k
  ctxt->external = oldctxt->external;
12945
9.91k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
9.91k
        ctxt->dict = oldctxt->dict;
12947
9.91k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
9.91k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
9.91k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
9.91k
        ctxt->dictNames = oldctxt->dictNames;
12951
9.91k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
9.91k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
9.91k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
9.91k
  ctxt->record_info = oldctxt->record_info;
12955
9.91k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
9.91k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
9.91k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
9.91k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
9.91k
    xmlParseContent(ctxt);
12970
12971
9.91k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
294
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
9.61k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
9.91k
    if (ctxt->node != newDoc->children) {
12977
2.36k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
2.36k
    }
12979
12980
9.91k
    if (!ctxt->wellFormed) {
12981
4.91k
  ret = (xmlParserErrors)ctxt->errNo;
12982
4.91k
        if (oldctxt != NULL) {
12983
4.91k
            oldctxt->errNo = ctxt->errNo;
12984
4.91k
            oldctxt->wellFormed = 0;
12985
4.91k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
4.91k
        }
12987
4.99k
    } else {
12988
4.99k
  if (list != NULL) {
12989
1.74k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
1.74k
      cur = newDoc->children->children;
12996
1.74k
      *list = cur;
12997
6.69k
      while (cur != NULL) {
12998
4.95k
    cur->parent = NULL;
12999
4.95k
    cur = cur->next;
13000
4.95k
      }
13001
1.74k
            newDoc->children->children = NULL;
13002
1.74k
  }
13003
4.99k
  ret = XML_ERR_OK;
13004
4.99k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
9.91k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
9.91k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
9.91k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
9.91k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
9.91k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
9.91k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
9.91k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
9.91k
    }
13020
13021
9.91k
    if (oldctxt != NULL) {
13022
9.91k
        ctxt->dict = NULL;
13023
9.91k
        ctxt->attsDefault = NULL;
13024
9.91k
        ctxt->attsSpecial = NULL;
13025
9.91k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
9.91k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
9.91k
        oldctxt->validate = ctxt->validate;
13028
9.91k
        oldctxt->valid = ctxt->valid;
13029
9.91k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
9.91k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
9.91k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
9.91k
    }
13033
9.91k
    ctxt->node_seq.maximum = 0;
13034
9.91k
    ctxt->node_seq.length = 0;
13035
9.91k
    ctxt->node_seq.buffer = NULL;
13036
9.91k
    xmlFreeParserCtxt(ctxt);
13037
9.91k
    newDoc->intSubset = NULL;
13038
9.91k
    newDoc->extSubset = NULL;
13039
9.91k
    xmlFreeDoc(newDoc);
13040
13041
9.91k
    return(ret);
13042
9.91k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
54.5k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
54.5k
    xmlParserCtxtPtr ctxt;
13125
54.5k
    xmlDocPtr newDoc = NULL;
13126
54.5k
    xmlNodePtr newRoot;
13127
54.5k
    xmlSAXHandlerPtr oldsax = NULL;
13128
54.5k
    xmlNodePtr content = NULL;
13129
54.5k
    xmlNodePtr last = NULL;
13130
54.5k
    int size;
13131
54.5k
    xmlParserErrors ret = XML_ERR_OK;
13132
54.5k
#ifdef SAX2
13133
54.5k
    int i;
13134
54.5k
#endif
13135
13136
54.5k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
54.5k
        (oldctxt->depth >  100)) {
13138
51
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
51
                       "Maximum entity nesting depth exceeded");
13140
51
  return(XML_ERR_ENTITY_LOOP);
13141
51
    }
13142
13143
13144
54.4k
    if (lst != NULL)
13145
48.0k
        *lst = NULL;
13146
54.4k
    if (string == NULL)
13147
38
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
54.4k
    size = xmlStrlen(string);
13150
13151
54.4k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
54.4k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
49.2k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
49.2k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
49.2k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
49.2k
    else
13158
49.2k
  ctxt->userData = ctxt;
13159
49.2k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
49.2k
    ctxt->dict = oldctxt->dict;
13161
49.2k
    ctxt->input_id = oldctxt->input_id;
13162
49.2k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
49.2k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
49.2k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
49.2k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
49.6k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
438
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
438
    }
13171
49.2k
#endif
13172
13173
49.2k
    oldsax = ctxt->sax;
13174
49.2k
    ctxt->sax = oldctxt->sax;
13175
49.2k
    xmlDetectSAX2(ctxt);
13176
49.2k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
49.2k
    ctxt->options = oldctxt->options;
13178
13179
49.2k
    ctxt->_private = oldctxt->_private;
13180
49.2k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
49.2k
    } else {
13193
49.2k
  ctxt->myDoc = oldctxt->myDoc;
13194
49.2k
        content = ctxt->myDoc->children;
13195
49.2k
  last = ctxt->myDoc->last;
13196
49.2k
    }
13197
49.2k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
49.2k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
49.2k
    ctxt->myDoc->children = NULL;
13208
49.2k
    ctxt->myDoc->last = NULL;
13209
49.2k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
49.2k
    nodePush(ctxt, ctxt->myDoc->children);
13211
49.2k
    ctxt->instate = XML_PARSER_CONTENT;
13212
49.2k
    ctxt->depth = oldctxt->depth;
13213
13214
49.2k
    ctxt->validate = 0;
13215
49.2k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
49.2k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
42.4k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
42.4k
    }
13222
49.2k
    ctxt->dictNames = oldctxt->dictNames;
13223
49.2k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
49.2k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
49.2k
    xmlParseContent(ctxt);
13227
49.2k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
392
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
48.8k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
49.2k
    if (ctxt->node != ctxt->myDoc->children) {
13233
1.64k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
1.64k
    }
13235
13236
49.2k
    if (!ctxt->wellFormed) {
13237
7.74k
  ret = (xmlParserErrors)ctxt->errNo;
13238
7.74k
        oldctxt->errNo = ctxt->errNo;
13239
7.74k
        oldctxt->wellFormed = 0;
13240
7.74k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
41.4k
    } else {
13242
41.4k
        ret = XML_ERR_OK;
13243
41.4k
    }
13244
13245
49.2k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
39.8k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
39.8k
  cur = ctxt->myDoc->children->children;
13253
39.8k
  *lst = cur;
13254
158k
  while (cur != NULL) {
13255
118k
#ifdef LIBXML_VALID_ENABLED
13256
118k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
118k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
118k
    (cur->type == XML_ELEMENT_NODE)) {
13259
18.0k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
18.0k
      oldctxt->myDoc, cur);
13261
18.0k
      }
13262
118k
#endif /* LIBXML_VALID_ENABLED */
13263
118k
      cur->parent = NULL;
13264
118k
      cur = cur->next;
13265
118k
  }
13266
39.8k
  ctxt->myDoc->children->children = NULL;
13267
39.8k
    }
13268
49.2k
    if (ctxt->myDoc != NULL) {
13269
49.2k
  xmlFreeNode(ctxt->myDoc->children);
13270
49.2k
        ctxt->myDoc->children = content;
13271
49.2k
        ctxt->myDoc->last = last;
13272
49.2k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
49.2k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
49.2k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
49.2k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
49.2k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
49.2k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
49.2k
    }
13285
13286
49.2k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
49.2k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
49.2k
    ctxt->sax = oldsax;
13289
49.2k
    ctxt->dict = NULL;
13290
49.2k
    ctxt->attsDefault = NULL;
13291
49.2k
    ctxt->attsSpecial = NULL;
13292
49.2k
    xmlFreeParserCtxt(ctxt);
13293
49.2k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
49.2k
    return(ret);
13298
49.2k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
45.0k
        xmlParserCtxtPtr pctx) {
13783
45.0k
    xmlParserCtxtPtr ctxt;
13784
45.0k
    xmlParserInputPtr inputStream;
13785
45.0k
    char *directory = NULL;
13786
45.0k
    xmlChar *uri;
13787
13788
45.0k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
45.0k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
45.0k
    if (pctx != NULL) {
13794
45.0k
        ctxt->options = pctx->options;
13795
45.0k
        ctxt->_private = pctx->_private;
13796
45.0k
  ctxt->input_id = pctx->input_id;
13797
45.0k
    }
13798
13799
    /* Don't read from stdin. */
13800
45.0k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
2
        URL = BAD_CAST "./-";
13802
13803
45.0k
    uri = xmlBuildURI(URL, base);
13804
13805
45.0k
    if (uri == NULL) {
13806
1.97k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
1.97k
  if (inputStream == NULL) {
13808
1.95k
      xmlFreeParserCtxt(ctxt);
13809
1.95k
      return(NULL);
13810
1.95k
  }
13811
13812
13
  inputPush(ctxt, inputStream);
13813
13814
13
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
13
      directory = xmlParserGetDirectory((char *)URL);
13816
13
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
13
      ctxt->directory = directory;
13818
43.0k
    } else {
13819
43.0k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
43.0k
  if (inputStream == NULL) {
13821
33.1k
      xmlFree(uri);
13822
33.1k
      xmlFreeParserCtxt(ctxt);
13823
33.1k
      return(NULL);
13824
33.1k
  }
13825
13826
9.89k
  inputPush(ctxt, inputStream);
13827
13828
9.89k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
9.89k
      directory = xmlParserGetDirectory((char *)uri);
13830
9.89k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
9.89k
      ctxt->directory = directory;
13832
9.89k
  xmlFree(uri);
13833
9.89k
    }
13834
9.91k
    return(ctxt);
13835
45.0k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
368k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
368k
    xmlParserCtxtPtr ctxt;
14178
368k
    xmlParserInputPtr input;
14179
368k
    xmlParserInputBufferPtr buf;
14180
14181
368k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
368k
    if (size <= 0)
14184
5.86k
  return(NULL);
14185
14186
362k
    ctxt = xmlNewParserCtxt();
14187
362k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
362k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
362k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
362k
    input = xmlNewInputStream(ctxt);
14197
362k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
362k
    input->filename = NULL;
14204
362k
    input->buf = buf;
14205
362k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
362k
    inputPush(ctxt, input);
14208
362k
    return(ctxt);
14209
362k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
273M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
273M
    if (xmlParserInitialized != 0)
14525
273M
  return;
14526
14527
3.14k
#ifdef LIBXML_THREAD_ENABLED
14528
3.14k
    __xmlGlobalInitMutexLock();
14529
3.14k
    if (xmlParserInitialized == 0) {
14530
3.14k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.14k
  xmlInitThreadsInternal();
14537
3.14k
  xmlInitGlobalsInternal();
14538
3.14k
  xmlInitMemoryInternal();
14539
3.14k
        __xmlInitializeDict();
14540
3.14k
  xmlInitEncodingInternal();
14541
3.14k
  xmlRegisterDefaultInputCallbacks();
14542
3.14k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.14k
  xmlRegisterDefaultOutputCallbacks();
14544
3.14k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.14k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.14k
  xmlInitXPathInternal();
14547
3.14k
#endif
14548
3.14k
  xmlParserInitialized = 1;
14549
3.14k
#ifdef LIBXML_THREAD_ENABLED
14550
3.14k
    }
14551
3.14k
    __xmlGlobalInitMutexUnlock();
14552
3.14k
#endif
14553
3.14k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
941k
{
14843
941k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
941k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
941k
    if (options & XML_PARSE_RECOVER) {
14851
547k
        ctxt->recovery = 1;
14852
547k
        options -= XML_PARSE_RECOVER;
14853
547k
  ctxt->options |= XML_PARSE_RECOVER;
14854
547k
    } else
14855
394k
        ctxt->recovery = 0;
14856
941k
    if (options & XML_PARSE_DTDLOAD) {
14857
650k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
650k
        options -= XML_PARSE_DTDLOAD;
14859
650k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
650k
    } else
14861
291k
        ctxt->loadsubset = 0;
14862
941k
    if (options & XML_PARSE_DTDATTR) {
14863
369k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
369k
        options -= XML_PARSE_DTDATTR;
14865
369k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
369k
    }
14867
941k
    if (options & XML_PARSE_NOENT) {
14868
601k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
601k
        options -= XML_PARSE_NOENT;
14871
601k
  ctxt->options |= XML_PARSE_NOENT;
14872
601k
    } else
14873
339k
        ctxt->replaceEntities = 0;
14874
941k
    if (options & XML_PARSE_PEDANTIC) {
14875
201k
        ctxt->pedantic = 1;
14876
201k
        options -= XML_PARSE_PEDANTIC;
14877
201k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
201k
    } else
14879
739k
        ctxt->pedantic = 0;
14880
941k
    if (options & XML_PARSE_NOBLANKS) {
14881
372k
        ctxt->keepBlanks = 0;
14882
372k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
372k
        options -= XML_PARSE_NOBLANKS;
14884
372k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
372k
    } else
14886
569k
        ctxt->keepBlanks = 1;
14887
941k
    if (options & XML_PARSE_DTDVALID) {
14888
386k
        ctxt->validate = 1;
14889
386k
        if (options & XML_PARSE_NOWARNING)
14890
224k
            ctxt->vctxt.warning = NULL;
14891
386k
        if (options & XML_PARSE_NOERROR)
14892
309k
            ctxt->vctxt.error = NULL;
14893
386k
        options -= XML_PARSE_DTDVALID;
14894
386k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
386k
    } else
14896
554k
        ctxt->validate = 0;
14897
941k
    if (options & XML_PARSE_NOWARNING) {
14898
319k
        ctxt->sax->warning = NULL;
14899
319k
        options -= XML_PARSE_NOWARNING;
14900
319k
    }
14901
941k
    if (options & XML_PARSE_NOERROR) {
14902
472k
        ctxt->sax->error = NULL;
14903
472k
        ctxt->sax->fatalError = NULL;
14904
472k
        options -= XML_PARSE_NOERROR;
14905
472k
    }
14906
941k
#ifdef LIBXML_SAX1_ENABLED
14907
941k
    if (options & XML_PARSE_SAX1) {
14908
313k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
313k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
313k
        ctxt->sax->startElementNs = NULL;
14911
313k
        ctxt->sax->endElementNs = NULL;
14912
313k
        ctxt->sax->initialized = 1;
14913
313k
        options -= XML_PARSE_SAX1;
14914
313k
  ctxt->options |= XML_PARSE_SAX1;
14915
313k
    }
14916
941k
#endif /* LIBXML_SAX1_ENABLED */
14917
941k
    if (options & XML_PARSE_NODICT) {
14918
341k
        ctxt->dictNames = 0;
14919
341k
        options -= XML_PARSE_NODICT;
14920
341k
  ctxt->options |= XML_PARSE_NODICT;
14921
599k
    } else {
14922
599k
        ctxt->dictNames = 1;
14923
599k
    }
14924
941k
    if (options & XML_PARSE_NOCDATA) {
14925
365k
        ctxt->sax->cdataBlock = NULL;
14926
365k
        options -= XML_PARSE_NOCDATA;
14927
365k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
365k
    }
14929
941k
    if (options & XML_PARSE_NSCLEAN) {
14930
494k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
494k
        options -= XML_PARSE_NSCLEAN;
14932
494k
    }
14933
941k
    if (options & XML_PARSE_NONET) {
14934
411k
  ctxt->options |= XML_PARSE_NONET;
14935
411k
        options -= XML_PARSE_NONET;
14936
411k
    }
14937
941k
    if (options & XML_PARSE_COMPACT) {
14938
577k
  ctxt->options |= XML_PARSE_COMPACT;
14939
577k
        options -= XML_PARSE_COMPACT;
14940
577k
    }
14941
941k
    if (options & XML_PARSE_OLD10) {
14942
299k
  ctxt->options |= XML_PARSE_OLD10;
14943
299k
        options -= XML_PARSE_OLD10;
14944
299k
    }
14945
941k
    if (options & XML_PARSE_NOBASEFIX) {
14946
358k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
358k
        options -= XML_PARSE_NOBASEFIX;
14948
358k
    }
14949
941k
    if (options & XML_PARSE_HUGE) {
14950
298k
  ctxt->options |= XML_PARSE_HUGE;
14951
298k
        options -= XML_PARSE_HUGE;
14952
298k
        if (ctxt->dict != NULL)
14953
298k
            xmlDictSetLimit(ctxt->dict, 0);
14954
298k
    }
14955
941k
    if (options & XML_PARSE_OLDSAX) {
14956
305k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
305k
        options -= XML_PARSE_OLDSAX;
14958
305k
    }
14959
941k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
461k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
461k
        options -= XML_PARSE_IGNORE_ENC;
14962
461k
    }
14963
941k
    if (options & XML_PARSE_BIG_LINES) {
14964
382k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
382k
        options -= XML_PARSE_BIG_LINES;
14966
382k
    }
14967
941k
    ctxt->linenumbers = 1;
14968
941k
    return (options);
14969
941k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
627k
{
14984
627k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
627k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
313k
{
15003
313k
    xmlDocPtr ret;
15004
15005
313k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
313k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
313k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
313k
        (ctxt->input->filename == NULL))
15015
313k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
313k
    xmlParseDocument(ctxt);
15017
313k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
190k
        ret = ctxt->myDoc;
15019
123k
    else {
15020
123k
        ret = NULL;
15021
123k
  if (ctxt->myDoc != NULL) {
15022
103k
      xmlFreeDoc(ctxt->myDoc);
15023
103k
  }
15024
123k
    }
15025
313k
    ctxt->myDoc = NULL;
15026
313k
    if (!reuse) {
15027
313k
  xmlFreeParserCtxt(ctxt);
15028
313k
    }
15029
15030
313k
    return (ret);
15031
313k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
313k
{
15096
313k
    xmlParserCtxtPtr ctxt;
15097
15098
313k
    xmlInitParser();
15099
313k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
313k
    if (ctxt == NULL)
15101
639
        return (NULL);
15102
313k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
313k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387