Coverage Report

Created: 2023-10-15 08:16

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
193M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
9.14k
#define XML_PARSER_NON_LINEAR 10
129
130
728M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
430M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
130G
#define XML_PARSER_BUFFER_SIZE 100
147
2.20M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
138M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
32.6k
{
215
32.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
32.6k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
32.6k
    if (ctxt != NULL)
219
32.6k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
32.6k
    if (prefix == NULL)
222
22.2k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
22.2k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
22.2k
                        (const char *) localname, NULL, NULL, 0, 0,
225
22.2k
                        "Attribute %s redefined\n", localname);
226
10.3k
    else
227
10.3k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
10.3k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
10.3k
                        (const char *) prefix, (const char *) localname,
230
10.3k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
10.3k
                        localname);
232
32.6k
    if (ctxt != NULL) {
233
32.6k
  ctxt->wellFormed = 0;
234
32.6k
  if (ctxt->recovery == 0)
235
15.5k
      ctxt->disableSAX = 1;
236
32.6k
    }
237
32.6k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
17.8M
{
250
17.8M
    const char *errmsg;
251
252
17.8M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
17.8M
        (ctxt->instate == XML_PARSER_EOF))
254
27.6k
  return;
255
17.7M
    switch (error) {
256
45.0k
        case XML_ERR_INVALID_HEX_CHARREF:
257
45.0k
            errmsg = "CharRef: invalid hexadecimal value";
258
45.0k
            break;
259
98.4k
        case XML_ERR_INVALID_DEC_CHARREF:
260
98.4k
            errmsg = "CharRef: invalid decimal value";
261
98.4k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
938k
        case XML_ERR_INTERNAL_ERROR:
266
938k
            errmsg = "internal error";
267
938k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
4.90M
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
4.90M
            errmsg = "PEReference: expecting ';'";
282
4.90M
            break;
283
3.11k
        case XML_ERR_ENTITY_LOOP:
284
3.11k
            errmsg = "Detected an entity reference loop";
285
3.11k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
3.65k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
3.65k
            errmsg = "PEReferences forbidden in internal subset";
291
3.65k
            break;
292
5.34k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
5.34k
            errmsg = "EntityValue: \" or ' expected";
294
5.34k
            break;
295
87.4k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
87.4k
            errmsg = "AttValue: \" or ' expected";
297
87.4k
            break;
298
260k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
260k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
260k
            break;
301
41.5k
        case XML_ERR_LITERAL_NOT_STARTED:
302
41.5k
            errmsg = "SystemLiteral \" or ' expected";
303
41.5k
            break;
304
52.4k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
52.4k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
52.4k
            break;
307
41.8k
        case XML_ERR_MISPLACED_CDATA_END:
308
41.8k
            errmsg = "Sequence ']]>' not allowed in content";
309
41.8k
            break;
310
30.1k
        case XML_ERR_URI_REQUIRED:
311
30.1k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
30.1k
            break;
313
11.5k
        case XML_ERR_PUBID_REQUIRED:
314
11.5k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
11.5k
            break;
316
9.97M
        case XML_ERR_HYPHEN_IN_COMMENT:
317
9.97M
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
9.97M
            break;
319
62.3k
        case XML_ERR_PI_NOT_STARTED:
320
62.3k
            errmsg = "xmlParsePI : no target name";
321
62.3k
            break;
322
10.8k
        case XML_ERR_RESERVED_XML_NAME:
323
10.8k
            errmsg = "Invalid PI name";
324
10.8k
            break;
325
3.58k
        case XML_ERR_NOTATION_NOT_STARTED:
326
3.58k
            errmsg = "NOTATION: Name expected here";
327
3.58k
            break;
328
48.8k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
48.8k
            errmsg = "'>' required to close NOTATION declaration";
330
48.8k
            break;
331
29.9k
        case XML_ERR_VALUE_REQUIRED:
332
29.9k
            errmsg = "Entity value required";
333
29.9k
            break;
334
4.16k
        case XML_ERR_URI_FRAGMENT:
335
4.16k
            errmsg = "Fragment not allowed";
336
4.16k
            break;
337
23.8k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
23.8k
            errmsg = "'(' required to start ATTLIST enumeration";
339
23.8k
            break;
340
6.16k
        case XML_ERR_NMTOKEN_REQUIRED:
341
6.16k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
6.16k
            break;
343
15.5k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
15.5k
            errmsg = "')' required to finish ATTLIST enumeration";
345
15.5k
            break;
346
4.25k
        case XML_ERR_MIXED_NOT_STARTED:
347
4.25k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
4.25k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
18.2k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
18.2k
            errmsg = "ContentDecl : Name or '(' expected";
354
18.2k
            break;
355
18.9k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
18.9k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
18.9k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
254k
        case XML_ERR_GT_REQUIRED:
363
254k
            errmsg = "expected '>'";
364
254k
            break;
365
452
        case XML_ERR_CONDSEC_INVALID:
366
452
            errmsg = "XML conditional section '[' expected";
367
452
            break;
368
45.9k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
45.9k
            errmsg = "Content error in the external subset";
370
45.9k
            break;
371
2.37k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
2.37k
            errmsg =
373
2.37k
                "conditional section INCLUDE or IGNORE keyword expected";
374
2.37k
            break;
375
2.99k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.99k
            errmsg = "XML conditional section not closed";
377
2.99k
            break;
378
430
        case XML_ERR_XMLDECL_NOT_STARTED:
379
430
            errmsg = "Text declaration '<?xml' required";
380
430
            break;
381
124k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
124k
            errmsg = "parsing XML declaration: '?>' expected";
383
124k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
180k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
180k
            errmsg = "EntityRef: expecting ';'";
389
180k
            break;
390
33.6k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
33.6k
            errmsg = "DOCTYPE improperly terminated";
392
33.6k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
7.14k
        case XML_ERR_EQUAL_REQUIRED:
397
7.14k
            errmsg = "expected '='";
398
7.14k
            break;
399
27.8k
        case XML_ERR_STRING_NOT_CLOSED:
400
27.8k
            errmsg = "String not closed expecting \" or '";
401
27.8k
            break;
402
5.76k
        case XML_ERR_STRING_NOT_STARTED:
403
5.76k
            errmsg = "String not started expecting ' or \"";
404
5.76k
            break;
405
836
        case XML_ERR_ENCODING_NAME:
406
836
            errmsg = "Invalid XML encoding name";
407
836
            break;
408
1.35k
        case XML_ERR_STANDALONE_VALUE:
409
1.35k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.35k
            break;
411
37.8k
        case XML_ERR_DOCUMENT_EMPTY:
412
37.8k
            errmsg = "Document is empty";
413
37.8k
            break;
414
226k
        case XML_ERR_DOCUMENT_END:
415
226k
            errmsg = "Extra content at the end of the document";
416
226k
            break;
417
10.1k
        case XML_ERR_NOT_WELL_BALANCED:
418
10.1k
            errmsg = "chunk is not well balanced";
419
10.1k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
69.6k
        case XML_ERR_VERSION_MISSING:
424
69.6k
            errmsg = "Malformed declaration expecting version";
425
69.6k
            break;
426
498
        case XML_ERR_NAME_TOO_LONG:
427
498
            errmsg = "Name too long";
428
498
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
2.44k
        default:
435
2.44k
            errmsg = "Unregistered error message";
436
17.7M
    }
437
17.7M
    if (ctxt != NULL)
438
17.7M
  ctxt->errNo = error;
439
17.7M
    if (info == NULL) {
440
16.8M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
16.8M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
16.8M
                        errmsg);
443
16.8M
    } else {
444
938k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
938k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
938k
                        errmsg, info);
447
938k
    }
448
17.7M
    if (ctxt != NULL) {
449
17.7M
  ctxt->wellFormed = 0;
450
17.7M
  if (ctxt->recovery == 0)
451
2.37M
      ctxt->disableSAX = 1;
452
17.7M
    }
453
17.7M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
5.09M
{
467
5.09M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
5.09M
        (ctxt->instate == XML_PARSER_EOF))
469
1.34k
  return;
470
5.09M
    if (ctxt != NULL)
471
5.09M
  ctxt->errNo = error;
472
5.09M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
5.09M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
5.09M
    if (ctxt != NULL) {
475
5.09M
  ctxt->wellFormed = 0;
476
5.09M
  if (ctxt->recovery == 0)
477
1.29M
      ctxt->disableSAX = 1;
478
5.09M
    }
479
5.09M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
68.5M
{
495
68.5M
    xmlStructuredErrorFunc schannel = NULL;
496
497
68.5M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
68.5M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
68.5M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
68.5M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
30.4M
        schannel = ctxt->sax->serror;
503
68.5M
    if (ctxt != NULL) {
504
68.5M
        __xmlRaiseError(schannel,
505
68.5M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
68.5M
                    ctxt->userData,
507
68.5M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
68.5M
                    XML_ERR_WARNING, NULL, 0,
509
68.5M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
68.5M
        msg, (const char *) str1, (const char *) str2);
511
68.5M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
68.5M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
12.3M
{
533
12.3M
    xmlStructuredErrorFunc schannel = NULL;
534
535
12.3M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
12.3M
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
12.3M
    if (ctxt != NULL) {
539
12.3M
  ctxt->errNo = error;
540
12.3M
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
7.01M
      schannel = ctxt->sax->serror;
542
12.3M
    }
543
12.3M
    if (ctxt != NULL) {
544
12.3M
        __xmlRaiseError(schannel,
545
12.3M
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
12.3M
                    ctxt, NULL, XML_FROM_DTD, error,
547
12.3M
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
12.3M
        (const char *) str2, NULL, 0, 0,
549
12.3M
        msg, (const char *) str1, (const char *) str2);
550
12.3M
  ctxt->valid = 0;
551
12.3M
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
12.3M
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
2.60M
{
573
2.60M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
2.60M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
2.60M
    if (ctxt != NULL)
577
2.60M
  ctxt->errNo = error;
578
2.60M
    __xmlRaiseError(NULL, NULL, NULL,
579
2.60M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
2.60M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
2.60M
    if (ctxt != NULL) {
582
2.60M
  ctxt->wellFormed = 0;
583
2.60M
  if (ctxt->recovery == 0)
584
348k
      ctxt->disableSAX = 1;
585
2.60M
    }
586
2.60M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
1.66M
{
604
1.66M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
1.66M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
1.66M
    if (ctxt != NULL)
608
1.66M
  ctxt->errNo = error;
609
1.66M
    __xmlRaiseError(NULL, NULL, NULL,
610
1.66M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
1.66M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
1.66M
        NULL, val, 0, msg, str1, val, str2);
613
1.66M
    if (ctxt != NULL) {
614
1.66M
  ctxt->wellFormed = 0;
615
1.66M
  if (ctxt->recovery == 0)
616
456k
      ctxt->disableSAX = 1;
617
1.66M
    }
618
1.66M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
166M
{
633
166M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
166M
        (ctxt->instate == XML_PARSER_EOF))
635
31
  return;
636
166M
    if (ctxt != NULL)
637
166M
  ctxt->errNo = error;
638
166M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
166M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
166M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
166M
                    val);
642
166M
    if (ctxt != NULL) {
643
166M
  ctxt->wellFormed = 0;
644
166M
  if (ctxt->recovery == 0)
645
57.1M
      ctxt->disableSAX = 1;
646
166M
    }
647
166M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
736k
{
662
736k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
736k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
736k
    if (ctxt != NULL)
666
736k
  ctxt->errNo = error;
667
736k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
736k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
736k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
736k
                    val);
671
736k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
1.10M
{
689
1.10M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
1.10M
        (ctxt->instate == XML_PARSER_EOF))
691
96
  return;
692
1.10M
    if (ctxt != NULL)
693
1.10M
  ctxt->errNo = error;
694
1.10M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
1.10M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
1.10M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
1.10M
                    info1, info2, info3);
698
1.10M
    if (ctxt != NULL)
699
1.10M
  ctxt->nsWellFormed = 0;
700
1.10M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
87.2k
{
718
87.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
87.2k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
87.2k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
87.2k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
87.2k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
87.2k
                    info1, info2, info3);
725
87.2k
}
726
727
static void
728
2.50G
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
2.50G
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
2.50G
    else
732
2.50G
        *dst += val;
733
2.50G
}
734
735
static void
736
735M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
735M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
735M
    else
740
735M
        *dst += val;
741
735M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
728M
{
770
728M
    unsigned long consumed;
771
728M
    xmlParserInputPtr input = ctxt->input;
772
728M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
728M
    consumed = input->parentConsumed;
779
728M
    if ((entity == NULL) ||
780
728M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
416M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
312M
        xmlSaturatedAdd(&consumed, input->consumed);
783
312M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
312M
    }
785
728M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
728M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
728M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
728M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
728M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
9.14k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
9.14k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
9.14k
                       "Maximum entity amplification factor exceeded");
803
9.14k
        xmlHaltParser(ctxt);
804
9.14k
        return(1);
805
9.14k
    }
806
807
728M
    return(0);
808
728M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
1.99M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
1.99M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
1.99M
    (void) sax;
1048
1049
1.99M
    if (ctxt == NULL) return;
1050
1.99M
    sax = ctxt->sax;
1051
1.99M
#ifdef LIBXML_SAX1_ENABLED
1052
1.99M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
1.99M
        ((sax->startElementNs != NULL) ||
1054
1.22M
         (sax->endElementNs != NULL) ||
1055
1.22M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.22M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
1.99M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
1.99M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
1.99M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
1.99M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
1.99M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
1.99M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
434k
{
1103
434k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
755k
    while (*src == 0x20) src++;
1107
14.2M
    while (*src != 0) {
1108
13.8M
  if (*src == 0x20) {
1109
2.54M
      while (*src == 0x20) src++;
1110
332k
      if (*src != 0)
1111
303k
    *dst++ = 0x20;
1112
13.4M
  } else {
1113
13.4M
      *dst++ = *src++;
1114
13.4M
  }
1115
13.8M
    }
1116
434k
    *dst = 0;
1117
434k
    if (dst == src)
1118
338k
       return(NULL);
1119
96.6k
    return(dst);
1120
434k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
100k
{
1136
100k
    int i;
1137
100k
    int remove_head = 0;
1138
100k
    int need_realloc = 0;
1139
100k
    const xmlChar *cur;
1140
1141
100k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
100k
    i = *len;
1144
100k
    if (i <= 0)
1145
3.01k
        return(NULL);
1146
1147
97.3k
    cur = src;
1148
120k
    while (*cur == 0x20) {
1149
22.9k
        cur++;
1150
22.9k
  remove_head++;
1151
22.9k
    }
1152
1.73M
    while (*cur != 0) {
1153
1.65M
  if (*cur == 0x20) {
1154
85.8k
      cur++;
1155
85.8k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
12.3k
          need_realloc = 1;
1157
12.3k
    break;
1158
12.3k
      }
1159
85.8k
  } else
1160
1.56M
      cur++;
1161
1.65M
    }
1162
97.3k
    if (need_realloc) {
1163
12.3k
        xmlChar *ret;
1164
1165
12.3k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
12.3k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
12.3k
  xmlAttrNormalizeSpace(ret, ret);
1171
12.3k
  *len = strlen((const char *)ret);
1172
12.3k
        return(ret);
1173
84.9k
    } else if (remove_head) {
1174
1.32k
        *len -= remove_head;
1175
1.32k
        memmove(src, src + remove_head, 1 + *len);
1176
1.32k
  return(src);
1177
1.32k
    }
1178
83.6k
    return(NULL);
1179
97.3k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
671k
               const xmlChar *value) {
1195
671k
    xmlDefAttrsPtr defaults;
1196
671k
    int len;
1197
671k
    const xmlChar *name;
1198
671k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
671k
    if (ctxt->attsSpecial != NULL) {
1204
639k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
27.3k
      return;
1206
639k
    }
1207
1208
644k
    if (ctxt->attsDefault == NULL) {
1209
63.9k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
63.9k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
63.9k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
644k
    name = xmlSplitQName3(fullname, &len);
1219
644k
    if (name == NULL) {
1220
626k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
626k
  prefix = NULL;
1222
626k
    } else {
1223
18.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
18.2k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
18.2k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
644k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
644k
    if (defaults == NULL) {
1232
339k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
339k
                     (4 * 5) * sizeof(const xmlChar *));
1234
339k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
339k
  defaults->nbAttrs = 0;
1237
339k
  defaults->maxAttrs = 4;
1238
339k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
339k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
339k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
3.69k
        xmlDefAttrsPtr temp;
1245
1246
3.69k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
3.69k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
3.69k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
3.69k
  defaults = temp;
1251
3.69k
  defaults->maxAttrs *= 2;
1252
3.69k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
3.69k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
3.69k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
644k
    name = xmlSplitQName3(fullattr, &len);
1264
644k
    if (name == NULL) {
1265
529k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
529k
  prefix = NULL;
1267
529k
    } else {
1268
114k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
114k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
114k
    }
1271
1272
644k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
644k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
644k
    len = xmlStrlen(value);
1276
644k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
644k
    if (value == NULL)
1278
0
        goto mem_error;
1279
644k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
644k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
644k
    if (ctxt->external)
1282
447k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
197k
    else
1284
197k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
644k
    defaults->nbAttrs++;
1286
1287
644k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
644k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
9.74M
{
1309
9.74M
    if (ctxt->attsSpecial == NULL) {
1310
110k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
110k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
110k
    }
1314
1315
9.74M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
1.42M
        return;
1317
1318
8.32M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
8.32M
                     (void *) (ptrdiff_t) type);
1320
8.32M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
9.74M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
6.73M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
6.73M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
6.73M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
2.13M
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
2.13M
    }
1341
6.73M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
422k
{
1354
422k
    if (ctxt->attsSpecial == NULL)
1355
334k
        return;
1356
1357
88.0k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
88.0k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
8.76k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
8.76k
        ctxt->attsSpecial = NULL;
1362
8.76k
    }
1363
88.0k
    return;
1364
422k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
39.2k
{
1427
39.2k
    const xmlChar *cur = lang, *nxt;
1428
1429
39.2k
    if (cur == NULL)
1430
633
        return (0);
1431
38.6k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
38.6k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
38.6k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
38.6k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
1.48k
        cur += 2;
1441
10.4k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
10.4k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
8.98k
            cur++;
1444
1.48k
        return(cur[0] == 0);
1445
1.48k
    }
1446
37.1k
    nxt = cur;
1447
142k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
142k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
105k
           nxt++;
1450
37.1k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
2.41k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
1.93k
            return(0);
1456
488
        return(1);
1457
2.41k
    }
1458
34.7k
    if (nxt - cur < 2)
1459
1.62k
        return(0);
1460
    /* we got an ISO 639 code */
1461
33.1k
    if (nxt[0] == 0)
1462
15.5k
        return(1);
1463
17.5k
    if (nxt[0] != '-')
1464
2.75k
        return(0);
1465
1466
14.8k
    nxt++;
1467
14.8k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
14.8k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
1.45k
        goto region_m49;
1471
1472
58.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
58.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
45.4k
           nxt++;
1475
13.3k
    if (nxt - cur == 4)
1476
2.48k
        goto script;
1477
10.8k
    if (nxt - cur == 2)
1478
1.57k
        goto region;
1479
9.31k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
523
        goto variant;
1481
8.79k
    if (nxt - cur != 3)
1482
1.13k
        return(0);
1483
    /* we parsed an extlang */
1484
7.65k
    if (nxt[0] == 0)
1485
571
        return(1);
1486
7.08k
    if (nxt[0] != '-')
1487
524
        return(0);
1488
1489
6.56k
    nxt++;
1490
6.56k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
6.56k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
697
        goto region_m49;
1494
1495
34.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
34.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
29.0k
           nxt++;
1498
5.86k
    if (nxt - cur == 2)
1499
411
        goto region;
1500
5.45k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
2.88k
        goto variant;
1502
2.56k
    if (nxt - cur != 4)
1503
2.07k
        return(0);
1504
    /* we parsed a script */
1505
2.97k
script:
1506
2.97k
    if (nxt[0] == 0)
1507
497
        return(1);
1508
2.47k
    if (nxt[0] != '-')
1509
720
        return(0);
1510
1511
1.75k
    nxt++;
1512
1.75k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
1.75k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
364
        goto region_m49;
1516
1517
15.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
15.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
13.9k
           nxt++;
1520
1521
1.39k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
340
        goto variant;
1523
1.05k
    if (nxt - cur != 2)
1524
699
        return(0);
1525
    /* we parsed a region */
1526
3.40k
region:
1527
3.40k
    if (nxt[0] == 0)
1528
587
        return(1);
1529
2.81k
    if (nxt[0] != '-')
1530
1.35k
        return(0);
1531
1532
1.46k
    nxt++;
1533
1.46k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
31.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
31.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
30.1k
           nxt++;
1538
1539
1.46k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
800
        return(0);
1541
1542
    /* we parsed a variant */
1543
4.41k
variant:
1544
4.41k
    if (nxt[0] == 0)
1545
540
        return(1);
1546
3.87k
    if (nxt[0] != '-')
1547
3.64k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
232
    return (1);
1550
1551
2.51k
region_m49:
1552
2.51k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
2.51k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
1.06k
        nxt += 3;
1555
1.06k
        goto region;
1556
1.06k
    }
1557
1.44k
    return(0);
1558
2.51k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
321k
{
1584
321k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
96.3k
        int i;
1586
254k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
207k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
49.2k
          if (ctxt->nsTab[i + 1] == URL)
1590
17.9k
        return(-2);
1591
    /* out of scope keep it */
1592
31.2k
    break;
1593
49.2k
      }
1594
207k
  }
1595
96.3k
    }
1596
303k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
76.8k
  ctxt->nsMax = 10;
1598
76.8k
  ctxt->nsNr = 0;
1599
76.8k
  ctxt->nsTab = (const xmlChar **)
1600
76.8k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
76.8k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
226k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
5.62k
        const xmlChar ** tmp;
1608
5.62k
        ctxt->nsMax *= 2;
1609
5.62k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
5.62k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
5.62k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
5.62k
  ctxt->nsTab = tmp;
1617
5.62k
    }
1618
303k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
303k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
303k
    return (ctxt->nsNr);
1621
303k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
137k
{
1634
137k
    int i;
1635
1636
137k
    if (ctxt->nsTab == NULL) return(0);
1637
137k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
137k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
454k
    for (i = 0;i < nr;i++) {
1645
316k
         ctxt->nsNr--;
1646
316k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
316k
    }
1648
137k
    return(nr);
1649
137k
}
1650
#endif
1651
1652
static int
1653
170k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
170k
    const xmlChar **atts;
1655
170k
    int *attallocs;
1656
170k
    int maxatts;
1657
1658
170k
    if (nr + 5 > ctxt->maxatts) {
1659
170k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
170k
  atts = (const xmlChar **) xmlMalloc(
1661
170k
             maxatts * sizeof(const xmlChar *));
1662
170k
  if (atts == NULL) goto mem_error;
1663
170k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
170k
                               (maxatts / 5) * sizeof(int));
1665
170k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
170k
        if (ctxt->maxatts > 0)
1670
759
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
170k
        xmlFree(ctxt->atts);
1672
170k
  ctxt->atts = atts;
1673
170k
  ctxt->attallocs = attallocs;
1674
170k
  ctxt->maxatts = maxatts;
1675
170k
    }
1676
170k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
170k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
417M
{
1694
417M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
417M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
11.8k
        size_t newSize = ctxt->inputMax * 2;
1698
11.8k
        xmlParserInputPtr *tmp;
1699
1700
11.8k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
11.8k
                                               newSize * sizeof(*tmp));
1702
11.8k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
11.8k
        ctxt->inputTab = tmp;
1707
11.8k
        ctxt->inputMax = newSize;
1708
11.8k
    }
1709
417M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
417M
    ctxt->input = value;
1711
417M
    return (ctxt->inputNr++);
1712
417M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
421M
{
1724
421M
    xmlParserInputPtr ret;
1725
1726
421M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
421M
    if (ctxt->inputNr <= 0)
1729
3.91M
        return (NULL);
1730
417M
    ctxt->inputNr--;
1731
417M
    if (ctxt->inputNr > 0)
1732
416M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
1.33M
    else
1734
1.33M
        ctxt->input = NULL;
1735
417M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
417M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
417M
    return (ret);
1738
421M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
46.9M
{
1751
46.9M
    if (ctxt == NULL) return(0);
1752
46.9M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
30.0k
        xmlNodePtr *tmp;
1754
1755
30.0k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
30.0k
                                      ctxt->nodeMax * 2 *
1757
30.0k
                                      sizeof(ctxt->nodeTab[0]));
1758
30.0k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
30.0k
        ctxt->nodeTab = tmp;
1763
30.0k
  ctxt->nodeMax *= 2;
1764
30.0k
    }
1765
46.9M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
46.9M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
69
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
69
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
69
        xmlParserMaxDepth);
1770
69
  xmlHaltParser(ctxt);
1771
69
  return(-1);
1772
69
    }
1773
46.9M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
46.9M
    ctxt->node = value;
1775
46.9M
    return (ctxt->nodeNr++);
1776
46.9M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
45.9M
{
1789
45.9M
    xmlNodePtr ret;
1790
1791
45.9M
    if (ctxt == NULL) return(NULL);
1792
45.9M
    if (ctxt->nodeNr <= 0)
1793
377k
        return (NULL);
1794
45.6M
    ctxt->nodeNr--;
1795
45.6M
    if (ctxt->nodeNr > 0)
1796
44.9M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
698k
    else
1798
698k
        ctxt->node = NULL;
1799
45.6M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
45.6M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
45.6M
    return (ret);
1802
45.9M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
41.9M
{
1821
41.9M
    xmlStartTag *tag;
1822
1823
41.9M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
54.6k
        const xmlChar * *tmp;
1825
54.6k
        xmlStartTag *tmp2;
1826
54.6k
        ctxt->nameMax *= 2;
1827
54.6k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
54.6k
                                    ctxt->nameMax *
1829
54.6k
                                    sizeof(ctxt->nameTab[0]));
1830
54.6k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
54.6k
  ctxt->nameTab = tmp;
1835
54.6k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
54.6k
                                    ctxt->nameMax *
1837
54.6k
                                    sizeof(ctxt->pushTab[0]));
1838
54.6k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
54.6k
  ctxt->pushTab = tmp2;
1843
41.9M
    } else if (ctxt->pushTab == NULL) {
1844
617k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
617k
                                            sizeof(ctxt->pushTab[0]));
1846
617k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
617k
    }
1849
41.9M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
41.9M
    ctxt->name = value;
1851
41.9M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
41.9M
    tag->prefix = prefix;
1853
41.9M
    tag->URI = URI;
1854
41.9M
    tag->line = line;
1855
41.9M
    tag->nsNr = nsNr;
1856
41.9M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
41.9M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
10.8M
{
1873
10.8M
    const xmlChar *ret;
1874
1875
10.8M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
10.8M
    ctxt->nameNr--;
1878
10.8M
    if (ctxt->nameNr > 0)
1879
10.7M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
45.8k
    else
1881
45.8k
        ctxt->name = NULL;
1882
10.8M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
10.8M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
10.8M
    return (ret);
1885
10.8M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
29.1M
{
1931
29.1M
    const xmlChar *ret;
1932
1933
29.1M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
29.1M
    ctxt->nameNr--;
1936
29.1M
    if (ctxt->nameNr > 0)
1937
28.9M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
245k
    else
1939
245k
        ctxt->name = NULL;
1940
29.1M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
29.1M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
29.1M
    return (ret);
1943
29.1M
}
1944
1945
52.4M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
52.4M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
48.4k
        int *tmp;
1948
1949
48.4k
  ctxt->spaceMax *= 2;
1950
48.4k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
48.4k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
48.4k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
48.4k
  ctxt->spaceTab = tmp;
1958
48.4k
    }
1959
52.4M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
52.4M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
52.4M
    return(ctxt->spaceNr++);
1962
52.4M
}
1963
1964
51.5M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
51.5M
    int ret;
1966
51.5M
    if (ctxt->spaceNr <= 0) return(0);
1967
51.4M
    ctxt->spaceNr--;
1968
51.4M
    if (ctxt->spaceNr > 0)
1969
51.2M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
192k
    else
1971
192k
        ctxt->space = &ctxt->spaceTab[0];
1972
51.4M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
51.4M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
51.4M
    return(ret);
1975
51.5M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
4.16G
#define RAW (*ctxt->input->cur)
2013
3.80G
#define CUR (*ctxt->input->cur)
2014
3.61G
#define NXT(val) ctxt->input->cur[(val)]
2015
98.6M
#define CUR_PTR ctxt->input->cur
2016
1.46M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
500M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
250M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
460M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
387M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
327M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
262M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
112M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
112M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
282k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
282k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
1.64G
#define SKIP(val) do {             \
2037
1.64G
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
1.64G
    if (*ctxt->input->cur == 0)           \
2039
1.64G
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
1.64G
  } while (0)
2041
2042
192k
#define SKIPL(val) do {             \
2043
192k
    int skipl;                \
2044
42.0M
    for(skipl=0; skipl<val; skipl++) {         \
2045
41.8M
  if (*(ctxt->input->cur) == '\n') {       \
2046
900k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
40.9M
  } else ctxt->input->col++;         \
2048
41.8M
  ctxt->input->cur++;           \
2049
41.8M
    }                  \
2050
192k
    if (*ctxt->input->cur == 0)           \
2051
192k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
192k
  } while (0)
2053
2054
1.33G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
1.33G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
1.33G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
1.33G
  xmlSHRINK (ctxt);
2058
2059
5.25M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
5.25M
    if ((ctxt->input->buf) &&
2062
5.25M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
12.0k
        xmlParserInputShrink(ctxt->input);
2064
5.25M
    if (*ctxt->input->cur == 0)
2065
92.0k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
5.25M
}
2067
2068
4.10G
#define GROW if ((ctxt->progressive == 0) &&       \
2069
4.10G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
4.10G
  xmlGROW (ctxt);
2071
2072
929M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
929M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
929M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
929M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
929M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
929M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
929M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
929M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
929M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
929M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
929M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
13.4M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
929M
}
2095
2096
917M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
1.96G
#define NEXT xmlNextChar(ctxt)
2099
2100
98.1M
#define NEXT1 {               \
2101
98.1M
  ctxt->input->col++;           \
2102
98.1M
  ctxt->input->cur++;           \
2103
98.1M
  if (*ctxt->input->cur == 0)         \
2104
98.1M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
98.1M
    }
2106
2107
1.00G
#define NEXTL(l) do {             \
2108
1.00G
    if (*(ctxt->input->cur) == '\n') {         \
2109
8.48M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
1.00G
    } else ctxt->input->col++;           \
2111
1.00G
    ctxt->input->cur += l;        \
2112
1.00G
  } while (0)
2113
2114
1.05G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
43.0G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
41.9G
    if (l == 1) b[i++] = v;           \
2119
41.9G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
917M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
917M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
917M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
917M
        (ctxt->instate == XML_PARSER_START)) {
2141
211M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
211M
  cur = ctxt->input->cur;
2146
211M
  while (IS_BLANK_CH(*cur)) {
2147
60.1M
      if (*cur == '\n') {
2148
3.06M
    ctxt->input->line++; ctxt->input->col = 1;
2149
57.0M
      } else {
2150
57.0M
    ctxt->input->col++;
2151
57.0M
      }
2152
60.1M
      cur++;
2153
60.1M
      if (res < INT_MAX)
2154
60.1M
    res++;
2155
60.1M
      if (*cur == 0) {
2156
110k
    ctxt->input->cur = cur;
2157
110k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
110k
    cur = ctxt->input->cur;
2159
110k
      }
2160
60.1M
  }
2161
211M
  ctxt->input->cur = cur;
2162
705M
    } else {
2163
705M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
2.31G
  while (ctxt->instate != XML_PARSER_EOF) {
2166
2.31G
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
686M
    NEXT;
2168
1.62G
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
505M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
3.61M
                    break;
2174
501M
          xmlParsePEReference(ctxt);
2175
1.11G
            } else if (CUR == 0) {
2176
416M
                unsigned long consumed;
2177
416M
                xmlEntityPtr ent;
2178
2179
416M
                if (ctxt->inputNr <= 1)
2180
92.1k
                    break;
2181
2182
416M
                consumed = ctxt->input->consumed;
2183
416M
                xmlSaturatedAddSizeT(&consumed,
2184
416M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
416M
                ent = ctxt->input->entity;
2191
416M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
416M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
12.2k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
12.2k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
12.2k
                }
2197
2198
416M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
416M
                xmlPopInput(ctxt);
2201
702M
            } else {
2202
702M
                break;
2203
702M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
1.60G
      if (res < INT_MAX)
2213
1.60G
    res++;
2214
1.60G
        }
2215
705M
    }
2216
917M
    return(res);
2217
917M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
416M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
416M
    xmlParserInputPtr input;
2237
2238
416M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
416M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
416M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
416M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
416M
    input = inputPop(ctxt);
2247
416M
    if (input->entity != NULL)
2248
416M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
416M
    xmlFreeInputStream(input);
2250
416M
    if (*ctxt->input->cur == 0)
2251
198M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
416M
    return(CUR);
2253
416M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
416M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
416M
    int ret;
2267
416M
    if (input == NULL) return(-1);
2268
2269
416M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
416M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
416M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
416M
    ret = inputPush(ctxt, input);
2285
416M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
416M
    GROW;
2288
416M
    return(ret);
2289
416M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.01M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.01M
    int val = 0;
2311
1.01M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.01M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.01M
        (NXT(2) == 'x')) {
2318
431k
  SKIP(3);
2319
431k
  GROW;
2320
1.28M
  while (RAW != ';') { /* loop blocked by count */
2321
899k
      if (count++ > 20) {
2322
31.7k
    count = 0;
2323
31.7k
    GROW;
2324
31.7k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
31.7k
      }
2327
899k
      if ((RAW >= '0') && (RAW <= '9'))
2328
502k
          val = val * 16 + (CUR - '0');
2329
397k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
305k
          val = val * 16 + (CUR - 'a') + 10;
2331
92.1k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
49.7k
          val = val * 16 + (CUR - 'A') + 10;
2333
42.3k
      else {
2334
42.3k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
42.3k
    val = 0;
2336
42.3k
    break;
2337
42.3k
      }
2338
857k
      if (val > 0x110000)
2339
348k
          val = 0x110000;
2340
2341
857k
      NEXT;
2342
857k
      count++;
2343
857k
  }
2344
431k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
388k
      ctxt->input->col++;
2347
388k
      ctxt->input->cur++;
2348
388k
  }
2349
585k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
585k
  SKIP(2);
2351
585k
  GROW;
2352
2.47M
  while (RAW != ';') { /* loop blocked by count */
2353
1.97M
      if (count++ > 20) {
2354
42.4k
    count = 0;
2355
42.4k
    GROW;
2356
42.4k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
42.4k
      }
2359
1.97M
      if ((RAW >= '0') && (RAW <= '9'))
2360
1.88M
          val = val * 10 + (CUR - '0');
2361
92.9k
      else {
2362
92.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
92.9k
    val = 0;
2364
92.9k
    break;
2365
92.9k
      }
2366
1.88M
      if (val > 0x110000)
2367
452k
          val = 0x110000;
2368
2369
1.88M
      NEXT;
2370
1.88M
      count++;
2371
1.88M
  }
2372
585k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
492k
      ctxt->input->col++;
2375
492k
      ctxt->input->cur++;
2376
492k
  }
2377
585k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.01M
    if (val >= 0x110000) {
2389
2.24k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
2.24k
                "xmlParseCharRef: character reference out of bounds\n",
2391
2.24k
          val);
2392
1.01M
    } else if (IS_CHAR(val)) {
2393
854k
        return(val);
2394
854k
    } else {
2395
159k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
159k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
159k
                    val);
2398
159k
    }
2399
161k
    return(0);
2400
1.01M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
977k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
977k
    const xmlChar *ptr;
2423
977k
    xmlChar cur;
2424
977k
    int val = 0;
2425
2426
977k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
977k
    ptr = *str;
2428
977k
    cur = *ptr;
2429
977k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
58.1k
  ptr += 3;
2431
58.1k
  cur = *ptr;
2432
150k
  while (cur != ';') { /* Non input consuming loop */
2433
95.0k
      if ((cur >= '0') && (cur <= '9'))
2434
24.5k
          val = val * 16 + (cur - '0');
2435
70.5k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
11.7k
          val = val * 16 + (cur - 'a') + 10;
2437
58.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
56.0k
          val = val * 16 + (cur - 'A') + 10;
2439
2.71k
      else {
2440
2.71k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
2.71k
    val = 0;
2442
2.71k
    break;
2443
2.71k
      }
2444
92.3k
      if (val > 0x110000)
2445
21.8k
          val = 0x110000;
2446
2447
92.3k
      ptr++;
2448
92.3k
      cur = *ptr;
2449
92.3k
  }
2450
58.1k
  if (cur == ';')
2451
55.4k
      ptr++;
2452
919k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
919k
  ptr += 2;
2454
919k
  cur = *ptr;
2455
2.92M
  while (cur != ';') { /* Non input consuming loops */
2456
2.00M
      if ((cur >= '0') && (cur <= '9'))
2457
2.00M
          val = val * 10 + (cur - '0');
2458
5.52k
      else {
2459
5.52k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
5.52k
    val = 0;
2461
5.52k
    break;
2462
5.52k
      }
2463
2.00M
      if (val > 0x110000)
2464
8.12k
          val = 0x110000;
2465
2466
2.00M
      ptr++;
2467
2.00M
      cur = *ptr;
2468
2.00M
  }
2469
919k
  if (cur == ';')
2470
913k
      ptr++;
2471
919k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
977k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
977k
    if (val >= 0x110000) {
2483
722
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
722
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
722
                val);
2486
976k
    } else if (IS_CHAR(val)) {
2487
967k
        return(val);
2488
967k
    } else {
2489
9.45k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
9.45k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
9.45k
        val);
2492
9.45k
    }
2493
10.1k
    return(0);
2494
977k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
26.9M
#define growBuffer(buffer, n) {           \
2593
26.9M
    xmlChar *tmp;             \
2594
26.9M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
26.9M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
26.9M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
26.9M
    if (tmp == NULL) goto mem_error;         \
2598
26.9M
    buffer = tmp;             \
2599
26.9M
    buffer##_size = new_size;                                           \
2600
26.9M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
297M
                           int check) {
2617
297M
    xmlChar *buffer = NULL;
2618
297M
    size_t buffer_size = 0;
2619
297M
    size_t nbchars = 0;
2620
2621
297M
    xmlChar *current = NULL;
2622
297M
    xmlChar *rep = NULL;
2623
297M
    const xmlChar *last;
2624
297M
    xmlEntityPtr ent;
2625
297M
    int c,l;
2626
2627
297M
    if (str == NULL)
2628
31.8k
        return(NULL);
2629
297M
    last = str + len;
2630
2631
297M
    if (((ctxt->depth > 40) &&
2632
297M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
297M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
297M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
297M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
297M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
297M
    if (str < last)
2651
296M
  c = CUR_SCHAR(str, l);
2652
1.17M
    else
2653
1.17M
        c = 0;
2654
27.2G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
27.2G
           (c != end2) && (c != end3) &&
2656
27.2G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
26.9G
  if (c == 0) break;
2659
26.9G
        if ((c == '&') && (str[1] == '#')) {
2660
977k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
977k
      if (val == 0)
2662
10.1k
                goto int_error;
2663
967k
      COPY_BUF(0,buffer,nbchars,val);
2664
967k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
490
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
490
      }
2667
26.9G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
1.40G
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
1.40G
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
1.40G
      if ((ent != NULL) &&
2674
1.40G
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
1.77M
    if (ent->content != NULL) {
2676
1.77M
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
1.77M
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
304
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
304
        }
2680
1.77M
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
1.39G
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
284M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
3.60k
                    goto int_error;
2688
2689
284M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
821
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
821
                    xmlHaltParser(ctxt);
2692
821
                    ent->content[0] = 0;
2693
821
                    goto int_error;
2694
821
                }
2695
2696
284M
                ent->flags |= XML_ENT_EXPANDING;
2697
284M
    ctxt->depth++;
2698
284M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
284M
                        ent->length, what, 0, 0, 0, check);
2700
284M
    ctxt->depth--;
2701
284M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
284M
    if (rep == NULL) {
2704
49.2k
                    ent->content[0] = 0;
2705
49.2k
                    goto int_error;
2706
49.2k
                }
2707
2708
284M
                current = rep;
2709
89.5G
                while (*current != 0) { /* non input consuming loop */
2710
89.2G
                    buffer[nbchars++] = *current++;
2711
89.2G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
37.3M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
37.3M
                    }
2714
89.2G
                }
2715
284M
                xmlFree(rep);
2716
284M
                rep = NULL;
2717
1.11G
      } else if (ent != NULL) {
2718
101M
    int i = xmlStrlen(ent->name);
2719
101M
    const xmlChar *cur = ent->name;
2720
2721
101M
    buffer[nbchars++] = '&';
2722
101M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
6.18M
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
6.18M
    }
2725
3.00G
    for (;i > 0;i--)
2726
2.90G
        buffer[nbchars++] = *cur++;
2727
101M
    buffer[nbchars++] = ';';
2728
101M
      }
2729
25.5G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
3.74M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
3.74M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
3.74M
      if (ent != NULL) {
2735
3.57M
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
5.64k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
5.64k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
5.64k
      (ctxt->validate != 0)) {
2745
5.35k
      xmlLoadEntityContent(ctxt, ent);
2746
5.35k
        } else {
2747
297
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
297
      "not validating will not read content for PE entity %s\n",
2749
297
                          ent->name, NULL);
2750
297
        }
2751
5.64k
    }
2752
2753
3.57M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
1.29k
                    goto int_error;
2755
2756
3.57M
                if (ent->flags & XML_ENT_EXPANDING) {
2757
624
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
624
                    xmlHaltParser(ctxt);
2759
624
                    if (ent->content != NULL)
2760
356
                        ent->content[0] = 0;
2761
624
                    goto int_error;
2762
624
                }
2763
2764
3.57M
                ent->flags |= XML_ENT_EXPANDING;
2765
3.57M
    ctxt->depth++;
2766
3.57M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
3.57M
                        ent->length, what, 0, 0, 0, check);
2768
3.57M
    ctxt->depth--;
2769
3.57M
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
3.57M
    if (rep == NULL) {
2772
3.49k
                    if (ent->content != NULL)
2773
416
                        ent->content[0] = 0;
2774
3.49k
                    goto int_error;
2775
3.49k
                }
2776
3.56M
                current = rep;
2777
14.5G
                while (*current != 0) { /* non input consuming loop */
2778
14.5G
                    buffer[nbchars++] = *current++;
2779
14.5G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
954k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
954k
                    }
2782
14.5G
                }
2783
3.56M
                xmlFree(rep);
2784
3.56M
                rep = NULL;
2785
3.56M
      }
2786
25.5G
  } else {
2787
25.5G
      COPY_BUF(l,buffer,nbchars,c);
2788
25.5G
      str += l;
2789
25.5G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
9.12M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
9.12M
      }
2792
25.5G
  }
2793
26.9G
  if (str < last)
2794
26.6G
      c = CUR_SCHAR(str, l);
2795
296M
  else
2796
296M
      c = 0;
2797
26.9G
    }
2798
297M
    buffer[nbchars] = 0;
2799
297M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
69.2k
int_error:
2804
69.2k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
69.2k
    if (buffer != NULL)
2807
69.2k
        xmlFree(buffer);
2808
69.2k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
32.7k
                           xmlChar end3) {
2836
32.7k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
32.7k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
32.7k
                                      end, end2, end3, 0));
2840
32.7k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
578k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
578k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
578k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
578k
                                      end, end2, end3, 0));
2868
578k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
18.5M
                     int blank_chars) {
2890
18.5M
    int i, ret;
2891
18.5M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
18.5M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
718k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
17.8M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
17.8M
        (*(ctxt->space) == -2))
2905
4.49M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
13.3M
    if (blank_chars == 0) {
2911
33.0M
  for (i = 0;i < len;i++)
2912
28.3M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
6.36M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
11.7M
    if (ctxt->node == NULL) return(0);
2919
11.5M
    if (ctxt->myDoc != NULL) {
2920
11.5M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
11.5M
        if (ret == 0) return(1);
2922
8.26M
        if (ret == 1) return(0);
2923
8.26M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
8.12M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
8.07M
    if ((ctxt->node->children == NULL) &&
2930
8.07M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
8.06M
    lastChild = xmlGetLastChild(ctxt->node);
2933
8.06M
    if (lastChild == NULL) {
2934
1.22M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
1.22M
            (ctxt->node->content != NULL)) return(0);
2936
6.83M
    } else if (xmlNodeIsText(lastChild))
2937
84.0k
        return(0);
2938
6.75M
    else if ((ctxt->node->children != NULL) &&
2939
6.75M
             (xmlNodeIsText(ctxt->node->children)))
2940
79.0k
        return(0);
2941
7.90M
    return(1);
2942
8.06M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
52.9M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
52.9M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
52.9M
    xmlChar *buffer = NULL;
2973
52.9M
    int len = 0;
2974
52.9M
    int max = XML_MAX_NAMELEN;
2975
52.9M
    xmlChar *ret = NULL;
2976
52.9M
    const xmlChar *cur = name;
2977
52.9M
    int c;
2978
2979
52.9M
    if (prefix == NULL) return(NULL);
2980
52.9M
    *prefix = NULL;
2981
2982
52.9M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
52.9M
    if (cur[0] == ':')
2993
13.7k
  return(xmlStrdup(name));
2994
2995
52.9M
    c = *cur++;
2996
221M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
168M
  buf[len++] = c;
2998
168M
  c = *cur++;
2999
168M
    }
3000
52.9M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
11.5k
  max = len * 2;
3006
3007
11.5k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
11.5k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
11.5k
  memcpy(buffer, buf, len);
3013
24.7M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
24.7M
      if (len + 10 > max) {
3015
21.9k
          xmlChar *tmp;
3016
3017
21.9k
    max *= 2;
3018
21.9k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
21.9k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
21.9k
    buffer = tmp;
3025
21.9k
      }
3026
24.7M
      buffer[len++] = c;
3027
24.7M
      c = *cur++;
3028
24.7M
  }
3029
11.5k
  buffer[len] = 0;
3030
11.5k
    }
3031
3032
52.9M
    if ((c == ':') && (*cur == 0)) {
3033
22.7k
        if (buffer != NULL)
3034
206
      xmlFree(buffer);
3035
22.7k
  *prefix = NULL;
3036
22.7k
  return(xmlStrdup(name));
3037
22.7k
    }
3038
3039
52.9M
    if (buffer == NULL)
3040
52.9M
  ret = xmlStrndup(buf, len);
3041
11.3k
    else {
3042
11.3k
  ret = buffer;
3043
11.3k
  buffer = NULL;
3044
11.3k
  max = XML_MAX_NAMELEN;
3045
11.3k
    }
3046
3047
3048
52.9M
    if (c == ':') {
3049
2.07M
  c = *cur;
3050
2.07M
        *prefix = ret;
3051
2.07M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
2.07M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
2.07M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
2.07M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
2.07M
        (c == '_') || (c == ':'))) {
3063
18.7k
      int l;
3064
18.7k
      int first = CUR_SCHAR(cur, l);
3065
3066
18.7k
      if (!IS_LETTER(first) && (first != '_')) {
3067
4.37k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
4.37k
          "Name %s is not XML Namespace compliant\n",
3069
4.37k
          name);
3070
4.37k
      }
3071
18.7k
  }
3072
2.07M
  cur++;
3073
3074
12.7M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
10.6M
      buf[len++] = c;
3076
10.6M
      c = *cur++;
3077
10.6M
  }
3078
2.07M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
3.26k
      max = len * 2;
3084
3085
3.26k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
3.26k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
3.26k
      memcpy(buffer, buf, len);
3091
6.25M
      while (c != 0) { /* tested bigname2.xml */
3092
6.24M
    if (len + 10 > max) {
3093
5.50k
        xmlChar *tmp;
3094
3095
5.50k
        max *= 2;
3096
5.50k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
5.50k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
5.50k
        buffer = tmp;
3103
5.50k
    }
3104
6.24M
    buffer[len++] = c;
3105
6.24M
    c = *cur++;
3106
6.24M
      }
3107
3.26k
      buffer[len] = 0;
3108
3.26k
  }
3109
3110
2.07M
  if (buffer == NULL)
3111
2.07M
      ret = xmlStrndup(buf, len);
3112
3.26k
  else {
3113
3.26k
      ret = buffer;
3114
3.26k
  }
3115
2.07M
    }
3116
3117
52.9M
    return(ret);
3118
52.9M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
512M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
512M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
284M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
284M
      (((c >= 'a') && (c <= 'z')) ||
3160
284M
       ((c >= 'A') && (c <= 'Z')) ||
3161
284M
       (c == '_') || (c == ':') ||
3162
284M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
284M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
284M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
284M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
284M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
284M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
284M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
284M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
284M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
284M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
284M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
284M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
283M
      return(1);
3175
284M
    } else {
3176
228M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
227M
      return(1);
3178
228M
    }
3179
826k
    return(0);
3180
512M
}
3181
3182
static int
3183
15.5G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
15.5G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
10.2G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
10.2G
      (((c >= 'a') && (c <= 'z')) ||
3191
10.2G
       ((c >= 'A') && (c <= 'Z')) ||
3192
10.2G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
10.2G
       (c == '_') || (c == ':') ||
3194
10.2G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
10.2G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
10.2G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
10.2G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
10.2G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
10.2G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
10.2G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
10.2G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
10.2G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
10.2G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
10.2G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
10.2G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
10.2G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
10.2G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
10.2G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
9.92G
       return(1);
3210
10.2G
    } else {
3211
5.36G
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
5.36G
            (c == '.') || (c == '-') ||
3213
5.36G
      (c == '_') || (c == ':') ||
3214
5.36G
      (IS_COMBINING(c)) ||
3215
5.36G
      (IS_EXTENDER(c)))
3216
5.13G
      return(1);
3217
5.36G
    }
3218
515M
    return(0);
3219
15.5G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
5.89M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
5.89M
    int len = 0, l;
3227
5.89M
    int c;
3228
5.89M
    int count = 0;
3229
5.89M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
2.98M
                    XML_MAX_TEXT_LENGTH :
3231
5.89M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
5.89M
    GROW;
3241
5.89M
    if (ctxt->instate == XML_PARSER_EOF)
3242
104
        return(NULL);
3243
5.89M
    c = CUR_CHAR(l);
3244
5.89M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
3.32M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
3.32M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
3.25M
         ((c >= 'A') && (c <= 'Z')) ||
3252
3.25M
         (c == '_') || (c == ':') ||
3253
3.25M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
3.25M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
3.25M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
3.25M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
3.25M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
3.25M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
3.25M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
3.25M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
3.25M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
3.25M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
3.25M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
3.25M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.12M
      return(NULL);
3266
1.12M
  }
3267
2.19M
  len += l;
3268
2.19M
  NEXTL(l);
3269
2.19M
  c = CUR_CHAR(l);
3270
70.2M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
70.2M
         (((c >= 'a') && (c <= 'z')) ||
3272
70.2M
          ((c >= 'A') && (c <= 'Z')) ||
3273
70.2M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
70.2M
          (c == '_') || (c == ':') ||
3275
70.2M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
70.2M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
70.2M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
70.2M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
70.2M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
70.2M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
70.2M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
70.2M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
70.2M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
70.2M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
70.2M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
70.2M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
70.2M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
70.2M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
70.2M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
70.2M
    )) {
3291
68.0M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
562k
    count = 0;
3293
562k
    GROW;
3294
562k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
562k
      }
3297
68.0M
            if (len <= INT_MAX - l)
3298
68.0M
          len += l;
3299
68.0M
      NEXTL(l);
3300
68.0M
      c = CUR_CHAR(l);
3301
68.0M
  }
3302
2.57M
    } else {
3303
2.57M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
2.57M
      (!IS_LETTER(c) && (c != '_') &&
3305
2.50M
       (c != ':'))) {
3306
540k
      return(NULL);
3307
540k
  }
3308
2.02M
  len += l;
3309
2.02M
  NEXTL(l);
3310
2.02M
  c = CUR_CHAR(l);
3311
3312
19.6M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
19.6M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
19.6M
    (c == '.') || (c == '-') ||
3315
19.6M
    (c == '_') || (c == ':') ||
3316
19.6M
    (IS_COMBINING(c)) ||
3317
19.6M
    (IS_EXTENDER(c)))) {
3318
17.6M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
93.4k
    count = 0;
3320
93.4k
    GROW;
3321
93.4k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
93.4k
      }
3324
17.6M
            if (len <= INT_MAX - l)
3325
17.6M
          len += l;
3326
17.6M
      NEXTL(l);
3327
17.6M
      c = CUR_CHAR(l);
3328
17.6M
  }
3329
2.02M
    }
3330
4.22M
    if (len > maxLength) {
3331
188
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
188
        return(NULL);
3333
188
    }
3334
4.22M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
4.22M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
2.35k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
4.22M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
4.22M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
659M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
659M
    const xmlChar *in;
3370
659M
    const xmlChar *ret;
3371
659M
    size_t count = 0;
3372
659M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
254M
                       XML_MAX_TEXT_LENGTH :
3374
659M
                       XML_MAX_NAME_LENGTH;
3375
3376
659M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
659M
    in = ctxt->input->cur;
3386
659M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
659M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
659M
  (*in == '_') || (*in == ':')) {
3389
657M
  in++;
3390
2.65G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
2.65G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
2.65G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
2.65G
         (*in == '_') || (*in == '-') ||
3394
2.65G
         (*in == ':') || (*in == '.'))
3395
2.00G
      in++;
3396
657M
  if ((*in > 0) && (*in < 0x80)) {
3397
653M
      count = in - ctxt->input->cur;
3398
653M
            if (count > maxLength) {
3399
54
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
54
                return(NULL);
3401
54
            }
3402
653M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
653M
      ctxt->input->cur = in;
3404
653M
      ctxt->input->col += count;
3405
653M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
653M
      return(ret);
3408
653M
  }
3409
657M
    }
3410
    /* accelerator for special cases */
3411
5.89M
    return(xmlParseNameComplex(ctxt));
3412
659M
}
3413
3414
static const xmlChar *
3415
1.15M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
1.15M
    int len = 0, l;
3417
1.15M
    int c;
3418
1.15M
    int count = 0;
3419
1.15M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
482k
                    XML_MAX_TEXT_LENGTH :
3421
1.15M
                    XML_MAX_NAME_LENGTH;
3422
1.15M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
1.15M
    GROW;
3432
1.15M
    startPosition = CUR_PTR - BASE_PTR;
3433
1.15M
    c = CUR_CHAR(l);
3434
1.15M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
1.15M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
854k
  return(NULL);
3437
854k
    }
3438
3439
13.5M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
13.5M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
13.2M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
113k
      count = 0;
3443
113k
      GROW;
3444
113k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
113k
  }
3447
13.2M
        if (len <= INT_MAX - l)
3448
13.2M
      len += l;
3449
13.2M
  NEXTL(l);
3450
13.2M
  c = CUR_CHAR(l);
3451
13.2M
  if (c == 0) {
3452
38.5k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
38.5k
      ctxt->input->cur -= l;
3459
38.5k
      GROW;
3460
38.5k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
38.5k
      ctxt->input->cur += l;
3463
38.5k
      c = CUR_CHAR(l);
3464
38.5k
  }
3465
13.2M
    }
3466
302k
    if (len > maxLength) {
3467
139
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
139
        return(NULL);
3469
139
    }
3470
302k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
302k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
66.7M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
66.7M
    const xmlChar *in, *e;
3491
66.7M
    const xmlChar *ret;
3492
66.7M
    size_t count = 0;
3493
66.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
19.7M
                       XML_MAX_TEXT_LENGTH :
3495
66.7M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
66.7M
    in = ctxt->input->cur;
3505
66.7M
    e = ctxt->input->end;
3506
66.7M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
66.7M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
66.7M
   (*in == '_')) && (in < e)) {
3509
65.8M
  in++;
3510
210M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
210M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
210M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
210M
          (*in == '_') || (*in == '-') ||
3514
210M
          (*in == '.')) && (in < e))
3515
145M
      in++;
3516
65.8M
  if (in >= e)
3517
12.4k
      goto complex;
3518
65.8M
  if ((*in > 0) && (*in < 0x80)) {
3519
65.6M
      count = in - ctxt->input->cur;
3520
65.6M
            if (count > maxLength) {
3521
12
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
12
                return(NULL);
3523
12
            }
3524
65.6M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
65.6M
      ctxt->input->cur = in;
3526
65.6M
      ctxt->input->col += count;
3527
65.6M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
65.6M
      return(ret);
3531
65.6M
  }
3532
65.8M
    }
3533
1.15M
complex:
3534
1.15M
    return(xmlParseNCNameComplex(ctxt));
3535
66.7M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
31.1M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
31.1M
    register const xmlChar *cmp = other;
3551
31.1M
    register const xmlChar *in;
3552
31.1M
    const xmlChar *ret;
3553
3554
31.1M
    GROW;
3555
31.1M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
31.1M
    in = ctxt->input->cur;
3559
154M
    while (*in != 0 && *in == *cmp) {
3560
123M
  ++in;
3561
123M
  ++cmp;
3562
123M
    }
3563
31.1M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
30.4M
  ctxt->input->col += in - ctxt->input->cur;
3566
30.4M
  ctxt->input->cur = in;
3567
30.4M
  return (const xmlChar*) 1;
3568
30.4M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
644k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
644k
    if (ret == other) {
3573
27.1k
  return (const xmlChar*) 1;
3574
27.1k
    }
3575
617k
    return ret;
3576
644k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
511M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
511M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
511M
    const xmlChar *cur = *str;
3600
511M
    int len = 0, l;
3601
511M
    int c;
3602
511M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
187M
                    XML_MAX_TEXT_LENGTH :
3604
511M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
511M
    c = CUR_SCHAR(cur, l);
3611
511M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
68.8k
  return(NULL);
3613
68.8k
    }
3614
3615
511M
    COPY_BUF(l,buf,len,c);
3616
511M
    cur += l;
3617
511M
    c = CUR_SCHAR(cur, l);
3618
6.65G
    while (xmlIsNameChar(ctxt, c)) {
3619
6.18G
  COPY_BUF(l,buf,len,c);
3620
6.18G
  cur += l;
3621
6.18G
  c = CUR_SCHAR(cur, l);
3622
6.18G
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
38.8M
      xmlChar *buffer;
3628
38.8M
      int max = len * 2;
3629
3630
38.8M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
38.8M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
38.8M
      memcpy(buffer, buf, len);
3636
8.87G
      while (xmlIsNameChar(ctxt, c)) {
3637
8.83G
    if (len + 10 > max) {
3638
38.9M
        xmlChar *tmp;
3639
3640
38.9M
        max *= 2;
3641
38.9M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
38.9M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
38.9M
        buffer = tmp;
3648
38.9M
    }
3649
8.83G
    COPY_BUF(l,buffer,len,c);
3650
8.83G
    cur += l;
3651
8.83G
    c = CUR_SCHAR(cur, l);
3652
8.83G
                if (len > maxLength) {
3653
6
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
6
                    xmlFree(buffer);
3655
6
                    return(NULL);
3656
6
                }
3657
8.83G
      }
3658
38.8M
      buffer[len] = 0;
3659
38.8M
      *str = cur;
3660
38.8M
      return(buffer);
3661
38.8M
  }
3662
6.18G
    }
3663
472M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
472M
    *str = cur;
3668
472M
    return(xmlStrndup(buf, len));
3669
472M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
4.05M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
4.05M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
4.05M
    int len = 0, l;
3690
4.05M
    int c;
3691
4.05M
    int count = 0;
3692
4.05M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
1.38M
                    XML_MAX_TEXT_LENGTH :
3694
4.05M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
4.05M
    GROW;
3701
4.05M
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
4.05M
    c = CUR_CHAR(l);
3704
3705
24.1M
    while (xmlIsNameChar(ctxt, c)) {
3706
20.0M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
20.0M
  COPY_BUF(l,buf,len,c);
3711
20.0M
  NEXTL(l);
3712
20.0M
  c = CUR_CHAR(l);
3713
20.0M
  if (c == 0) {
3714
2.63k
      count = 0;
3715
2.63k
      GROW;
3716
2.63k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
2.63k
            c = CUR_CHAR(l);
3719
2.63k
  }
3720
20.0M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
3.87k
      xmlChar *buffer;
3726
3.87k
      int max = len * 2;
3727
3728
3.87k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
3.87k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
3.87k
      memcpy(buffer, buf, len);
3734
7.66M
      while (xmlIsNameChar(ctxt, c)) {
3735
7.65M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
77.0k
        count = 0;
3737
77.0k
        GROW;
3738
77.0k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
77.0k
    }
3743
7.65M
    if (len + 10 > max) {
3744
7.75k
        xmlChar *tmp;
3745
3746
7.75k
        max *= 2;
3747
7.75k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
7.75k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
7.75k
        buffer = tmp;
3754
7.75k
    }
3755
7.65M
    COPY_BUF(l,buffer,len,c);
3756
7.65M
    NEXTL(l);
3757
7.65M
    c = CUR_CHAR(l);
3758
7.65M
                if (len > maxLength) {
3759
13
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
13
                    xmlFree(buffer);
3761
13
                    return(NULL);
3762
13
                }
3763
7.65M
      }
3764
3.86k
      buffer[len] = 0;
3765
3.86k
      return(buffer);
3766
3.87k
  }
3767
20.0M
    }
3768
4.04M
    if (len == 0)
3769
42.2k
        return(NULL);
3770
4.00M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
4.00M
    return(xmlStrndup(buf, len));
3775
4.00M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
4.81M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
4.81M
    xmlChar *buf = NULL;
3795
4.81M
    int len = 0;
3796
4.81M
    int size = XML_PARSER_BUFFER_SIZE;
3797
4.81M
    int c, l;
3798
4.81M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
1.47M
                    XML_MAX_HUGE_LENGTH :
3800
4.81M
                    XML_MAX_TEXT_LENGTH;
3801
4.81M
    xmlChar stop;
3802
4.81M
    xmlChar *ret = NULL;
3803
4.81M
    const xmlChar *cur = NULL;
3804
4.81M
    xmlParserInputPtr input;
3805
3806
4.81M
    if (RAW == '"') stop = '"';
3807
1.04M
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
4.81M
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
4.81M
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
4.81M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
4.81M
    input = ctxt->input;
3824
4.81M
    GROW;
3825
4.81M
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
4.81M
    NEXT;
3828
4.81M
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
460M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
460M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
455M
  if (len + 5 >= size) {
3841
1.01M
      xmlChar *tmp;
3842
3843
1.01M
      size *= 2;
3844
1.01M
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
1.01M
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
1.01M
      buf = tmp;
3850
1.01M
  }
3851
455M
  COPY_BUF(l,buf,len,c);
3852
455M
  NEXTL(l);
3853
3854
455M
  GROW;
3855
455M
  c = CUR_CHAR(l);
3856
455M
  if (c == 0) {
3857
3.39k
      GROW;
3858
3.39k
      c = CUR_CHAR(l);
3859
3.39k
  }
3860
3861
455M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
455M
    }
3867
4.81M
    buf[len] = 0;
3868
4.81M
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
4.81M
    if (c != stop) {
3871
5.34k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
5.34k
        goto error;
3873
5.34k
    }
3874
4.81M
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
4.81M
    cur = buf;
3882
238M
    while (*cur != 0) { /* non input consuming */
3883
234M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
4.99M
      xmlChar *name;
3885
4.99M
      xmlChar tmp = *cur;
3886
4.99M
            int nameOk = 0;
3887
3888
4.99M
      cur++;
3889
4.99M
      name = xmlParseStringName(ctxt, &cur);
3890
4.99M
            if (name != NULL) {
3891
4.98M
                nameOk = 1;
3892
4.98M
                xmlFree(name);
3893
4.98M
            }
3894
4.99M
            if ((nameOk == 0) || (*cur != ';')) {
3895
24.0k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
24.0k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
24.0k
                            tmp);
3898
24.0k
                goto error;
3899
24.0k
      }
3900
4.96M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
4.96M
    (ctxt->inputNr == 1)) {
3902
3.65k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
3.65k
                goto error;
3904
3.65k
      }
3905
4.96M
      if (*cur == 0)
3906
0
          break;
3907
4.96M
  }
3908
234M
  cur++;
3909
234M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
4.78M
    ++ctxt->depth;
3920
4.78M
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
4.78M
                                     0, 0, 0, /* check */ 1);
3922
4.78M
    --ctxt->depth;
3923
3924
4.78M
    if (orig != NULL) {
3925
4.78M
        *orig = buf;
3926
4.78M
        buf = NULL;
3927
4.78M
    }
3928
3929
4.81M
error:
3930
4.81M
    if (buf != NULL)
3931
33.0k
        xmlFree(buf);
3932
4.81M
    return(ret);
3933
4.78M
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.50M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.50M
    xmlChar limit = 0;
3950
1.50M
    xmlChar *buf = NULL;
3951
1.50M
    xmlChar *rep = NULL;
3952
1.50M
    size_t len = 0;
3953
1.50M
    size_t buf_size = 0;
3954
1.50M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
512k
                       XML_MAX_HUGE_LENGTH :
3956
1.50M
                       XML_MAX_TEXT_LENGTH;
3957
1.50M
    int c, l, in_space = 0;
3958
1.50M
    xmlChar *current = NULL;
3959
1.50M
    xmlEntityPtr ent;
3960
3961
1.50M
    if (NXT(0) == '"') {
3962
762k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
762k
  limit = '"';
3964
762k
        NEXT;
3965
762k
    } else if (NXT(0) == '\'') {
3966
740k
  limit = '\'';
3967
740k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
740k
        NEXT;
3969
740k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.50M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.50M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.50M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.50M
    c = CUR_CHAR(l);
3985
52.8M
    while (((NXT(0) != limit) && /* checked */
3986
52.8M
            (IS_CHAR(c)) && (c != '<')) &&
3987
52.8M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
51.3M
  if (c == '&') {
3989
6.81M
      in_space = 0;
3990
6.81M
      if (NXT(1) == '#') {
3991
487k
    int val = xmlParseCharRef(ctxt);
3992
3993
487k
    if (val == '&') {
3994
5.59k
        if (ctxt->replaceEntities) {
3995
2.72k
      if (len + 10 > buf_size) {
3996
384
          growBuffer(buf, 10);
3997
384
      }
3998
2.72k
      buf[len++] = '&';
3999
2.87k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
2.87k
      if (len + 10 > buf_size) {
4005
340
          growBuffer(buf, 10);
4006
340
      }
4007
2.87k
      buf[len++] = '&';
4008
2.87k
      buf[len++] = '#';
4009
2.87k
      buf[len++] = '3';
4010
2.87k
      buf[len++] = '8';
4011
2.87k
      buf[len++] = ';';
4012
2.87k
        }
4013
481k
    } else if (val != 0) {
4014
416k
        if (len + 10 > buf_size) {
4015
4.42k
      growBuffer(buf, 10);
4016
4.42k
        }
4017
416k
        len += xmlCopyChar(0, &buf[len], val);
4018
416k
    }
4019
6.32M
      } else {
4020
6.32M
    ent = xmlParseEntityRef(ctxt);
4021
6.32M
    if ((ent != NULL) &&
4022
6.32M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
251k
        if (len + 10 > buf_size) {
4024
370
      growBuffer(buf, 10);
4025
370
        }
4026
251k
        if ((ctxt->replaceEntities == 0) &&
4027
251k
            (ent->content[0] == '&')) {
4028
54.8k
      buf[len++] = '&';
4029
54.8k
      buf[len++] = '#';
4030
54.8k
      buf[len++] = '3';
4031
54.8k
      buf[len++] = '8';
4032
54.8k
      buf[len++] = ';';
4033
196k
        } else {
4034
196k
      buf[len++] = ent->content[0];
4035
196k
        }
4036
6.07M
    } else if ((ent != NULL) &&
4037
6.07M
               (ctxt->replaceEntities != 0)) {
4038
3.99M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
3.99M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
3.99M
      ++ctxt->depth;
4043
3.99M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
3.99M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
3.99M
                                /* check */ 1);
4046
3.99M
      --ctxt->depth;
4047
3.99M
      if (rep != NULL) {
4048
3.96M
          current = rep;
4049
1.63G
          while (*current != 0) { /* non input consuming */
4050
1.63G
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
1.63G
                                    (*current == 0x9)) {
4052
14.6M
                                    buf[len++] = 0x20;
4053
14.6M
                                    current++;
4054
14.6M
                                } else
4055
1.61G
                                    buf[len++] = *current++;
4056
1.63G
        if (len + 10 > buf_size) {
4057
74.6k
            growBuffer(buf, 10);
4058
74.6k
        }
4059
1.63G
          }
4060
3.96M
          xmlFree(rep);
4061
3.96M
          rep = NULL;
4062
3.96M
      }
4063
3.99M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
3.99M
    } else if (ent != NULL) {
4071
1.82M
        int i = xmlStrlen(ent->name);
4072
1.82M
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
1.82M
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
1.82M
      (ent->content != NULL)) {
4081
1.77M
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
31.2k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
31.2k
                            ctxt->sizeentcopy = ent->length;
4085
4086
31.2k
                            ++ctxt->depth;
4087
31.2k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
31.2k
                                    ent->content, ent->length,
4089
31.2k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
31.2k
                                    /* check */ 1);
4091
31.2k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
31.2k
                            if (ctxt->inSubset == 0) {
4100
24.9k
                                ent->flags |= XML_ENT_CHECKED;
4101
24.9k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
24.9k
                            }
4103
4104
31.2k
                            if (rep != NULL) {
4105
29.6k
                                xmlFree(rep);
4106
29.6k
                                rep = NULL;
4107
29.6k
                            } else {
4108
1.60k
                                ent->content[0] = 0;
4109
1.60k
                            }
4110
4111
31.2k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
1.24k
                                goto error;
4113
1.74M
                        } else {
4114
1.74M
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
70
                                goto error;
4116
1.74M
                        }
4117
1.77M
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
1.81M
        buf[len++] = '&';
4123
1.82M
        while (len + i + 10 > buf_size) {
4124
16.1k
      growBuffer(buf, i + 10);
4125
16.1k
        }
4126
6.74M
        for (;i > 0;i--)
4127
4.92M
      buf[len++] = *cur++;
4128
1.81M
        buf[len++] = ';';
4129
1.81M
    }
4130
6.32M
      }
4131
44.5M
  } else {
4132
44.5M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
6.10M
          if ((len != 0) || (!normalize)) {
4134
5.97M
        if ((!normalize) || (!in_space)) {
4135
5.77M
      COPY_BUF(l,buf,len,0x20);
4136
5.78M
      while (len + 10 > buf_size) {
4137
20.4k
          growBuffer(buf, 10);
4138
20.4k
      }
4139
5.77M
        }
4140
5.97M
        in_space = 1;
4141
5.97M
    }
4142
38.3M
      } else {
4143
38.3M
          in_space = 0;
4144
38.3M
    COPY_BUF(l,buf,len,c);
4145
38.3M
    if (len + 10 > buf_size) {
4146
107k
        growBuffer(buf, 10);
4147
107k
    }
4148
38.3M
      }
4149
44.5M
      NEXTL(l);
4150
44.5M
  }
4151
51.3M
  GROW;
4152
51.3M
  c = CUR_CHAR(l);
4153
51.3M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
51.3M
    }
4159
1.50M
    if (ctxt->instate == XML_PARSER_EOF)
4160
3.17k
        goto error;
4161
4162
1.49M
    if ((in_space) && (normalize)) {
4163
112k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
48.5k
    }
4165
1.49M
    buf[len] = 0;
4166
1.49M
    if (RAW == '<') {
4167
260k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.23M
    } else if (RAW != limit) {
4169
262k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
111k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
111k
         "invalid character in attribute value\n");
4172
150k
  } else {
4173
150k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
150k
         "AttValue: ' expected\n");
4175
150k
        }
4176
262k
    } else
4177
975k
  NEXT;
4178
4179
1.49M
    if (attlen != NULL) *attlen = len;
4180
1.49M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
4.49k
error:
4185
4.49k
    if (buf != NULL)
4186
4.49k
        xmlFree(buf);
4187
4.49k
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
4.49k
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
18.4M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
18.4M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
18.4M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
18.4M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
1.19M
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
1.19M
    xmlChar *buf = NULL;
4250
1.19M
    int len = 0;
4251
1.19M
    int size = XML_PARSER_BUFFER_SIZE;
4252
1.19M
    int cur, l;
4253
1.19M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
507k
                    XML_MAX_TEXT_LENGTH :
4255
1.19M
                    XML_MAX_NAME_LENGTH;
4256
1.19M
    xmlChar stop;
4257
1.19M
    int state = ctxt->instate;
4258
1.19M
    int count = 0;
4259
4260
1.19M
    SHRINK;
4261
1.19M
    if (RAW == '"') {
4262
487k
        NEXT;
4263
487k
  stop = '"';
4264
711k
    } else if (RAW == '\'') {
4265
681k
        NEXT;
4266
681k
  stop = '\'';
4267
681k
    } else {
4268
30.0k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
30.0k
  return(NULL);
4270
30.0k
    }
4271
4272
1.16M
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
1.16M
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
1.16M
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
1.16M
    cur = CUR_CHAR(l);
4279
26.6M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
25.4M
  if (len + 5 >= size) {
4281
34.2k
      xmlChar *tmp;
4282
4283
34.2k
      size *= 2;
4284
34.2k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
34.2k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
34.2k
      buf = tmp;
4292
34.2k
  }
4293
25.4M
  count++;
4294
25.4M
  if (count > 50) {
4295
330k
      SHRINK;
4296
330k
      GROW;
4297
330k
      count = 0;
4298
330k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
330k
  }
4303
25.4M
  COPY_BUF(l,buf,len,cur);
4304
25.4M
  NEXTL(l);
4305
25.4M
  cur = CUR_CHAR(l);
4306
25.4M
  if (cur == 0) {
4307
7.50k
      GROW;
4308
7.50k
      SHRINK;
4309
7.50k
      cur = CUR_CHAR(l);
4310
7.50k
  }
4311
25.4M
        if (len > maxLength) {
4312
83
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
83
            xmlFree(buf);
4314
83
            ctxt->instate = (xmlParserInputState) state;
4315
83
            return(NULL);
4316
83
        }
4317
25.4M
    }
4318
1.16M
    buf[len] = 0;
4319
1.16M
    ctxt->instate = (xmlParserInputState) state;
4320
1.16M
    if (!IS_CHAR(cur)) {
4321
10.6k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
1.15M
    } else {
4323
1.15M
  NEXT;
4324
1.15M
    }
4325
1.16M
    return(buf);
4326
1.16M
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
477k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
477k
    xmlChar *buf = NULL;
4344
477k
    int len = 0;
4345
477k
    int size = XML_PARSER_BUFFER_SIZE;
4346
477k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
205k
                    XML_MAX_TEXT_LENGTH :
4348
477k
                    XML_MAX_NAME_LENGTH;
4349
477k
    xmlChar cur;
4350
477k
    xmlChar stop;
4351
477k
    int count = 0;
4352
477k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
477k
    SHRINK;
4355
477k
    if (RAW == '"') {
4356
101k
        NEXT;
4357
101k
  stop = '"';
4358
375k
    } else if (RAW == '\'') {
4359
364k
        NEXT;
4360
364k
  stop = '\'';
4361
364k
    } else {
4362
11.5k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
11.5k
  return(NULL);
4364
11.5k
    }
4365
465k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
465k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
465k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
465k
    cur = CUR;
4372
7.94M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
7.48M
  if (len + 1 >= size) {
4374
9.46k
      xmlChar *tmp;
4375
4376
9.46k
      size *= 2;
4377
9.46k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
9.46k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
9.46k
      buf = tmp;
4384
9.46k
  }
4385
7.48M
  buf[len++] = cur;
4386
7.48M
  count++;
4387
7.48M
  if (count > 50) {
4388
72.0k
      SHRINK;
4389
72.0k
      GROW;
4390
72.0k
      count = 0;
4391
72.0k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
72.0k
  }
4396
7.48M
  NEXT;
4397
7.48M
  cur = CUR;
4398
7.48M
  if (cur == 0) {
4399
2.56k
      GROW;
4400
2.56k
      SHRINK;
4401
2.56k
      cur = CUR;
4402
2.56k
  }
4403
7.48M
        if (len > maxLength) {
4404
3
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
3
            xmlFree(buf);
4406
3
            return(NULL);
4407
3
        }
4408
7.48M
    }
4409
465k
    buf[len] = 0;
4410
465k
    if (cur != stop) {
4411
41.7k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
424k
    } else {
4413
424k
  NEXT;
4414
424k
    }
4415
465k
    ctxt->instate = oldstate;
4416
465k
    return(buf);
4417
465k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
84.5M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
84.5M
    const xmlChar *in;
4482
84.5M
    int nbchar = 0;
4483
84.5M
    int line = ctxt->input->line;
4484
84.5M
    int col = ctxt->input->col;
4485
84.5M
    int ccol;
4486
4487
84.5M
    SHRINK;
4488
84.5M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
84.5M
    in = ctxt->input->cur;
4494
99.9M
    do {
4495
130M
get_more_space:
4496
163M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
130M
        if (*in == 0xA) {
4498
32.0M
            do {
4499
32.0M
                ctxt->input->line++; ctxt->input->col = 1;
4500
32.0M
                in++;
4501
32.0M
            } while (*in == 0xA);
4502
30.4M
            goto get_more_space;
4503
30.4M
        }
4504
99.9M
        if (*in == '<') {
4505
21.3M
            nbchar = in - ctxt->input->cur;
4506
21.3M
            if (nbchar > 0) {
4507
21.3M
                const xmlChar *tmp = ctxt->input->cur;
4508
21.3M
                ctxt->input->cur = in;
4509
4510
21.3M
                if ((ctxt->sax != NULL) &&
4511
21.3M
                    (ctxt->sax->ignorableWhitespace !=
4512
21.3M
                     ctxt->sax->characters)) {
4513
7.96M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
6.64M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
6.64M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
6.64M
                                                   tmp, nbchar);
4517
6.64M
                    } else {
4518
1.31M
                        if (ctxt->sax->characters != NULL)
4519
1.31M
                            ctxt->sax->characters(ctxt->userData,
4520
1.31M
                                                  tmp, nbchar);
4521
1.31M
                        if (*ctxt->space == -1)
4522
374k
                            *ctxt->space = -2;
4523
1.31M
                    }
4524
13.4M
                } else if ((ctxt->sax != NULL) &&
4525
13.4M
                           (ctxt->sax->characters != NULL)) {
4526
13.4M
                    ctxt->sax->characters(ctxt->userData,
4527
13.4M
                                          tmp, nbchar);
4528
13.4M
                }
4529
21.3M
            }
4530
21.3M
            return;
4531
21.3M
        }
4532
4533
105M
get_more:
4534
105M
        ccol = ctxt->input->col;
4535
1.67G
        while (test_char_data[*in]) {
4536
1.57G
            in++;
4537
1.57G
            ccol++;
4538
1.57G
        }
4539
105M
        ctxt->input->col = ccol;
4540
105M
        if (*in == 0xA) {
4541
25.0M
            do {
4542
25.0M
                ctxt->input->line++; ctxt->input->col = 1;
4543
25.0M
                in++;
4544
25.0M
            } while (*in == 0xA);
4545
24.4M
            goto get_more;
4546
24.4M
        }
4547
80.8M
        if (*in == ']') {
4548
2.28M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
30.8k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
30.8k
                ctxt->input->cur = in + 1;
4551
30.8k
                return;
4552
30.8k
            }
4553
2.25M
            in++;
4554
2.25M
            ctxt->input->col++;
4555
2.25M
            goto get_more;
4556
2.28M
        }
4557
78.5M
        nbchar = in - ctxt->input->cur;
4558
78.5M
        if (nbchar > 0) {
4559
61.2M
            if ((ctxt->sax != NULL) &&
4560
61.2M
                (ctxt->sax->ignorableWhitespace !=
4561
61.2M
                 ctxt->sax->characters) &&
4562
61.2M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
9.43M
                const xmlChar *tmp = ctxt->input->cur;
4564
9.43M
                ctxt->input->cur = in;
4565
4566
9.43M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
4.57M
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
4.57M
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
4.57M
                                                       tmp, nbchar);
4570
4.86M
                } else {
4571
4.86M
                    if (ctxt->sax->characters != NULL)
4572
4.86M
                        ctxt->sax->characters(ctxt->userData,
4573
4.86M
                                              tmp, nbchar);
4574
4.86M
                    if (*ctxt->space == -1)
4575
1.61M
                        *ctxt->space = -2;
4576
4.86M
                }
4577
9.43M
                line = ctxt->input->line;
4578
9.43M
                col = ctxt->input->col;
4579
51.8M
            } else if (ctxt->sax != NULL) {
4580
51.8M
                if (ctxt->sax->characters != NULL)
4581
51.8M
                    ctxt->sax->characters(ctxt->userData,
4582
51.8M
                                          ctxt->input->cur, nbchar);
4583
51.8M
                line = ctxt->input->line;
4584
51.8M
                col = ctxt->input->col;
4585
51.8M
            }
4586
61.2M
        }
4587
78.5M
        ctxt->input->cur = in;
4588
78.5M
        if (*in == 0xD) {
4589
15.5M
            in++;
4590
15.5M
            if (*in == 0xA) {
4591
15.4M
                ctxt->input->cur = in;
4592
15.4M
                in++;
4593
15.4M
                ctxt->input->line++; ctxt->input->col = 1;
4594
15.4M
                continue; /* while */
4595
15.4M
            }
4596
25.6k
            in--;
4597
25.6k
        }
4598
63.0M
        if (*in == '<') {
4599
53.6M
            return;
4600
53.6M
        }
4601
9.42M
        if (*in == '&') {
4602
5.70M
            return;
4603
5.70M
        }
4604
3.72M
        SHRINK;
4605
3.72M
        GROW;
4606
3.72M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
3.72M
        in = ctxt->input->cur;
4609
19.2M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
19.2M
             (*in == 0x09) || (*in == 0x0a));
4611
3.74M
    ctxt->input->line = line;
4612
3.74M
    ctxt->input->col = col;
4613
3.74M
    xmlParseCharDataComplex(ctxt);
4614
3.74M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
3.74M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
3.74M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
3.74M
    int nbchar = 0;
4631
3.74M
    int cur, l;
4632
3.74M
    int count = 0;
4633
4634
3.74M
    SHRINK;
4635
3.74M
    GROW;
4636
3.74M
    cur = CUR_CHAR(l);
4637
82.0M
    while ((cur != '<') && /* checked */
4638
82.0M
           (cur != '&') &&
4639
82.0M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
78.3M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
10.9k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
10.9k
  }
4643
78.3M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
78.3M
  NEXTL(l);
4646
78.3M
  cur = CUR_CHAR(l);
4647
78.3M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
209k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
209k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
184k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
2.52k
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
2.52k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
2.52k
                                     buf, nbchar);
4658
181k
    } else {
4659
181k
        if (ctxt->sax->characters != NULL)
4660
181k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
181k
        if ((ctxt->sax->characters !=
4662
181k
             ctxt->sax->ignorableWhitespace) &&
4663
181k
      (*ctxt->space == -1))
4664
4.21k
      *ctxt->space = -2;
4665
181k
    }
4666
184k
      }
4667
209k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
209k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
209k
  }
4672
78.3M
  count++;
4673
78.3M
  if (count > 50) {
4674
1.24M
      SHRINK;
4675
1.24M
      GROW;
4676
1.24M
      count = 0;
4677
1.24M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.24M
  }
4680
78.3M
    }
4681
3.74M
    if (nbchar != 0) {
4682
1.22M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
1.22M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
1.01M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
3.77k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
3.77k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
1.01M
      } else {
4691
1.01M
    if (ctxt->sax->characters != NULL)
4692
1.01M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
1.01M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
1.01M
        (*ctxt->space == -1))
4695
164k
        *ctxt->space = -2;
4696
1.01M
      }
4697
1.01M
  }
4698
1.22M
    }
4699
3.74M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
2.38M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
2.38M
                          "PCDATA invalid Char value %d\n",
4703
2.38M
                    cur ? cur : CUR);
4704
2.38M
  NEXT;
4705
2.38M
    }
4706
3.74M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
1.65M
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
1.65M
    xmlChar *URI = NULL;
4735
4736
1.65M
    SHRINK;
4737
4738
1.65M
    *publicID = NULL;
4739
1.65M
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
868k
        SKIP(6);
4741
868k
  if (SKIP_BLANKS == 0) {
4742
4.89k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
4.89k
                     "Space required after 'SYSTEM'\n");
4744
4.89k
  }
4745
868k
  URI = xmlParseSystemLiteral(ctxt);
4746
868k
  if (URI == NULL) {
4747
9.02k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
9.02k
        }
4749
868k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
477k
        SKIP(6);
4751
477k
  if (SKIP_BLANKS == 0) {
4752
3.11k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
3.11k
        "Space required after 'PUBLIC'\n");
4754
3.11k
  }
4755
477k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
477k
  if (*publicID == NULL) {
4757
11.5k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
11.5k
  }
4759
477k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
329k
      if (SKIP_BLANKS == 0) {
4764
20.3k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
20.3k
      "Space required after the Public Identifier\n");
4766
20.3k
      }
4767
329k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
147k
      if (SKIP_BLANKS == 0) return(NULL);
4775
2.94k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
2.94k
  }
4777
330k
  URI = xmlParseSystemLiteral(ctxt);
4778
330k
  if (URI == NULL) {
4779
21.1k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
21.1k
        }
4781
330k
    }
4782
1.50M
    return(URI);
4783
1.65M
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
15.9M
                       size_t len, size_t size) {
4802
15.9M
    int q, ql;
4803
15.9M
    int r, rl;
4804
15.9M
    int cur, l;
4805
15.9M
    size_t count = 0;
4806
15.9M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
5.90M
                       XML_MAX_HUGE_LENGTH :
4808
15.9M
                       XML_MAX_TEXT_LENGTH;
4809
15.9M
    int inputid;
4810
4811
15.9M
    inputid = ctxt->input->id;
4812
4813
15.9M
    if (buf == NULL) {
4814
2.00M
        len = 0;
4815
2.00M
  size = XML_PARSER_BUFFER_SIZE;
4816
2.00M
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
2.00M
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
2.00M
    }
4822
15.9M
    GROW; /* Assure there's enough input data */
4823
15.9M
    q = CUR_CHAR(ql);
4824
15.9M
    if (q == 0)
4825
9.46M
        goto not_terminated;
4826
6.48M
    if (!IS_CHAR(q)) {
4827
16.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
16.6k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
16.6k
                    q);
4830
16.6k
  xmlFree (buf);
4831
16.6k
  return;
4832
16.6k
    }
4833
6.46M
    NEXTL(ql);
4834
6.46M
    r = CUR_CHAR(rl);
4835
6.46M
    if (r == 0)
4836
32.4k
        goto not_terminated;
4837
6.43M
    if (!IS_CHAR(r)) {
4838
1.59k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
1.59k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
1.59k
                    r);
4841
1.59k
  xmlFree (buf);
4842
1.59k
  return;
4843
1.59k
    }
4844
6.43M
    NEXTL(rl);
4845
6.43M
    cur = CUR_CHAR(l);
4846
6.43M
    if (cur == 0)
4847
2.85M
        goto not_terminated;
4848
150M
    while (IS_CHAR(cur) && /* checked */
4849
150M
           ((cur != '>') ||
4850
146M
      (r != '-') || (q != '-'))) {
4851
146M
  if ((r == '-') && (q == '-')) {
4852
9.97M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
9.97M
  }
4854
146M
  if (len + 5 >= size) {
4855
190k
      xmlChar *new_buf;
4856
190k
            size_t new_size;
4857
4858
190k
      new_size = size * 2;
4859
190k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
190k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
190k
      buf = new_buf;
4866
190k
            size = new_size;
4867
190k
  }
4868
146M
  COPY_BUF(ql,buf,len,q);
4869
146M
  q = r;
4870
146M
  ql = rl;
4871
146M
  r = cur;
4872
146M
  rl = l;
4873
4874
146M
  count++;
4875
146M
  if (count > 50) {
4876
2.39M
      SHRINK;
4877
2.39M
      GROW;
4878
2.39M
      count = 0;
4879
2.39M
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
2.39M
  }
4884
146M
  NEXTL(l);
4885
146M
  cur = CUR_CHAR(l);
4886
146M
  if (cur == 0) {
4887
3.39M
      SHRINK;
4888
3.39M
      GROW;
4889
3.39M
      cur = CUR_CHAR(l);
4890
3.39M
  }
4891
4892
146M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
146M
    }
4899
3.57M
    buf[len] = 0;
4900
3.57M
    if (cur == 0) {
4901
3.39M
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
3.39M
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
3.39M
    } else if (!IS_CHAR(cur)) {
4904
6.54k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
6.54k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
6.54k
                    cur);
4907
175k
    } else {
4908
175k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
175k
        NEXT;
4914
175k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
175k
      (!ctxt->disableSAX))
4916
163k
      ctxt->sax->comment(ctxt->userData, buf);
4917
175k
    }
4918
3.57M
    xmlFree(buf);
4919
3.57M
    return;
4920
12.3M
not_terminated:
4921
12.3M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
12.3M
       "Comment not terminated\n", NULL);
4923
12.3M
    xmlFree(buf);
4924
12.3M
    return;
4925
3.57M
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
505M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
505M
    xmlChar *buf = NULL;
4943
505M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
505M
    size_t len = 0;
4945
505M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
168M
                       XML_MAX_HUGE_LENGTH :
4947
505M
                       XML_MAX_TEXT_LENGTH;
4948
505M
    xmlParserInputState state;
4949
505M
    const xmlChar *in;
4950
505M
    size_t nbchar = 0;
4951
505M
    int ccol;
4952
505M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
505M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
505M
    SKIP(2);
4960
505M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
505
        return;
4962
505M
    state = ctxt->instate;
4963
505M
    ctxt->instate = XML_PARSER_COMMENT;
4964
505M
    inputid = ctxt->input->id;
4965
505M
    SKIP(2);
4966
505M
    SHRINK;
4967
505M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
505M
    in = ctxt->input->cur;
4974
505M
    do {
4975
505M
  if (*in == 0xA) {
4976
1.57M
      do {
4977
1.57M
    ctxt->input->line++; ctxt->input->col = 1;
4978
1.57M
    in++;
4979
1.57M
      } while (*in == 0xA);
4980
1.49M
  }
4981
575M
get_more:
4982
575M
        ccol = ctxt->input->col;
4983
1.83G
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
1.83G
         ((*in >= 0x20) && (*in < '-')) ||
4985
1.83G
         (*in == 0x09)) {
4986
1.26G
        in++;
4987
1.26G
        ccol++;
4988
1.26G
  }
4989
575M
  ctxt->input->col = ccol;
4990
575M
  if (*in == 0xA) {
4991
10.0M
      do {
4992
10.0M
    ctxt->input->line++; ctxt->input->col = 1;
4993
10.0M
    in++;
4994
10.0M
      } while (*in == 0xA);
4995
9.59M
      goto get_more;
4996
9.59M
  }
4997
566M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
566M
  if (nbchar > 0) {
5002
100M
      if ((ctxt->sax != NULL) &&
5003
100M
    (ctxt->sax->comment != NULL)) {
5004
100M
    if (buf == NULL) {
5005
50.1M
        if ((*in == '-') && (in[1] == '-'))
5006
33.6M
            size = nbchar + 1;
5007
16.4M
        else
5008
16.4M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
50.1M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
50.1M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
50.1M
        len = 0;
5016
50.1M
    } else if (len + nbchar + 1 >= size) {
5017
2.12M
        xmlChar *new_buf;
5018
2.12M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
2.12M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
2.12M
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
2.12M
        buf = new_buf;
5027
2.12M
    }
5028
100M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
100M
    len += nbchar;
5030
100M
    buf[len] = 0;
5031
100M
      }
5032
100M
  }
5033
566M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
566M
  ctxt->input->cur = in;
5040
566M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
566M
  if (*in == 0xD) {
5045
5.61M
      in++;
5046
5.61M
      if (*in == 0xA) {
5047
5.60M
    ctxt->input->cur = in;
5048
5.60M
    in++;
5049
5.60M
    ctxt->input->line++; ctxt->input->col = 1;
5050
5.60M
    goto get_more;
5051
5.60M
      }
5052
3.44k
      in--;
5053
3.44k
  }
5054
560M
  SHRINK;
5055
560M
  GROW;
5056
560M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
560M
  in = ctxt->input->cur;
5061
560M
  if (*in == '-') {
5062
544M
      if (in[1] == '-') {
5063
519M
          if (in[2] == '>') {
5064
489M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
489M
        SKIP(3);
5070
489M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
489M
            (!ctxt->disableSAX)) {
5072
395M
      if (buf != NULL)
5073
29.5M
          ctxt->sax->comment(ctxt->userData, buf);
5074
365M
      else
5075
365M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
395M
        }
5077
489M
        if (buf != NULL)
5078
36.1M
            xmlFree(buf);
5079
489M
        if (ctxt->instate != XML_PARSER_EOF)
5080
489M
      ctxt->instate = state;
5081
489M
        return;
5082
489M
    }
5083
30.2M
    if (buf != NULL) {
5084
27.3M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
27.3M
                          "Double hyphen within comment: "
5086
27.3M
                                      "<!--%.50s\n",
5087
27.3M
              buf);
5088
27.3M
    } else
5089
2.98M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
2.98M
                          "Double hyphen within comment\n", NULL);
5091
30.2M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
30.2M
    in++;
5096
30.2M
    ctxt->input->col++;
5097
30.2M
      }
5098
54.9M
      in++;
5099
54.9M
      ctxt->input->col++;
5100
54.9M
      goto get_more;
5101
544M
  }
5102
560M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
15.9M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
15.9M
    ctxt->instate = state;
5105
15.9M
    return;
5106
505M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
1.17M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
1.17M
    const xmlChar *name;
5125
5126
1.17M
    name = xmlParseName(ctxt);
5127
1.17M
    if ((name != NULL) &&
5128
1.17M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
1.17M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
1.17M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
294k
  int i;
5132
294k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
294k
      (name[2] == 'l') && (name[3] == 0)) {
5134
225k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
225k
     "XML declaration allowed only at the start of the document\n");
5136
225k
      return(name);
5137
225k
  } else if (name[3] == 0) {
5138
10.8k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
10.8k
      return(name);
5140
10.8k
  }
5141
115k
  for (i = 0;;i++) {
5142
115k
      if (xmlW3CPIs[i] == NULL) break;
5143
87.0k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
29.2k
          return(name);
5145
87.0k
  }
5146
28.9k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
28.9k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
28.9k
          NULL, NULL);
5149
28.9k
    }
5150
904k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
8.56k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
8.56k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
8.56k
    }
5154
904k
    return(name);
5155
1.17M
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
0
    xmlChar *URL = NULL;
5176
0
    const xmlChar *tmp, *base;
5177
0
    xmlChar marker;
5178
5179
0
    tmp = catalog;
5180
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
0
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
0
error:
5211
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
0
            "Catalog PI syntax error: %s\n",
5213
0
      catalog, NULL);
5214
0
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
0
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
1.17M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
1.17M
    xmlChar *buf = NULL;
5235
1.17M
    size_t len = 0;
5236
1.17M
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
1.17M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
440k
                       XML_MAX_HUGE_LENGTH :
5239
1.17M
                       XML_MAX_TEXT_LENGTH;
5240
1.17M
    int cur, l;
5241
1.17M
    const xmlChar *target;
5242
1.17M
    xmlParserInputState state;
5243
1.17M
    int count = 0;
5244
5245
1.17M
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
1.17M
  int inputid = ctxt->input->id;
5247
1.17M
  state = ctxt->instate;
5248
1.17M
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
1.17M
  SKIP(2);
5253
1.17M
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
1.17M
        target = xmlParsePITarget(ctxt);
5260
1.17M
  if (target != NULL) {
5261
1.10M
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
529k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
529k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
529k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
529k
        (ctxt->sax->processingInstruction != NULL))
5274
341k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
341k
                                         target, NULL);
5276
529k
    if (ctxt->instate != XML_PARSER_EOF)
5277
529k
        ctxt->instate = state;
5278
529k
    return;
5279
529k
      }
5280
578k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
578k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
578k
      if (SKIP_BLANKS == 0) {
5287
106k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
106k
        "ParsePI: PI %s space expected\n", target);
5289
106k
      }
5290
578k
      cur = CUR_CHAR(l);
5291
61.6M
      while (IS_CHAR(cur) && /* checked */
5292
61.6M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
61.1M
    if (len + 5 >= size) {
5294
48.3k
        xmlChar *tmp;
5295
48.3k
                    size_t new_size = size * 2;
5296
48.3k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
48.3k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
48.3k
        buf = tmp;
5304
48.3k
                    size = new_size;
5305
48.3k
    }
5306
61.1M
    count++;
5307
61.1M
    if (count > 50) {
5308
996k
        SHRINK;
5309
996k
        GROW;
5310
996k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
996k
        count = 0;
5315
996k
    }
5316
61.1M
    COPY_BUF(l,buf,len,cur);
5317
61.1M
    NEXTL(l);
5318
61.1M
    cur = CUR_CHAR(l);
5319
61.1M
    if (cur == 0) {
5320
20.0k
        SHRINK;
5321
20.0k
        GROW;
5322
20.0k
        cur = CUR_CHAR(l);
5323
20.0k
    }
5324
61.1M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
61.1M
      }
5332
578k
      buf[len] = 0;
5333
578k
      if (cur != '?') {
5334
39.1k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
39.1k
          "ParsePI: PI %s never end ...\n", target);
5336
539k
      } else {
5337
539k
    if (inputid != ctxt->input->id) {
5338
70
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
70
                             "PI declaration doesn't start and stop in"
5340
70
                                   " the same entity\n");
5341
70
    }
5342
539k
    SKIP(2);
5343
5344
539k
#ifdef LIBXML_CATALOG_ENABLED
5345
539k
    if (((state == XML_PARSER_MISC) ||
5346
539k
               (state == XML_PARSER_START)) &&
5347
539k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
0
      (allow == XML_CATA_ALLOW_ALL))
5351
0
      xmlParseCatalogPI(ctxt, buf);
5352
0
    }
5353
539k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
539k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
539k
        (ctxt->sax->processingInstruction != NULL))
5361
404k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
404k
                                         target, buf);
5363
539k
      }
5364
578k
      xmlFree(buf);
5365
578k
  } else {
5366
62.3k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
62.3k
  }
5368
640k
  if (ctxt->instate != XML_PARSER_EOF)
5369
640k
      ctxt->instate = state;
5370
640k
    }
5371
1.17M
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
375k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
375k
    const xmlChar *name;
5394
375k
    xmlChar *Pubid;
5395
375k
    xmlChar *Systemid;
5396
5397
375k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
375k
    SKIP(2);
5400
5401
375k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
374k
  int inputid = ctxt->input->id;
5403
374k
  SHRINK;
5404
374k
  SKIP(8);
5405
374k
  if (SKIP_BLANKS == 0) {
5406
9.54k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
9.54k
         "Space required after '<!NOTATION'\n");
5408
9.54k
      return;
5409
9.54k
  }
5410
5411
364k
        name = xmlParseName(ctxt);
5412
364k
  if (name == NULL) {
5413
1.55k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
1.55k
      return;
5415
1.55k
  }
5416
363k
  if (xmlStrchr(name, ':') != NULL) {
5417
455
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
455
         "colons are forbidden from notation names '%s'\n",
5419
455
         name, NULL, NULL);
5420
455
  }
5421
363k
  if (SKIP_BLANKS == 0) {
5422
6.16k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
6.16k
         "Space required after the NOTATION name'\n");
5424
6.16k
      return;
5425
6.16k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
357k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
357k
  SKIP_BLANKS;
5432
5433
357k
  if (RAW == '>') {
5434
316k
      if (inputid != ctxt->input->id) {
5435
138
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
138
                         "Notation declaration doesn't start and stop"
5437
138
                               " in the same entity\n");
5438
138
      }
5439
316k
      NEXT;
5440
316k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
316k
    (ctxt->sax->notationDecl != NULL))
5442
242k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
316k
  } else {
5444
41.1k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
41.1k
  }
5446
357k
  if (Systemid != NULL) xmlFree(Systemid);
5447
357k
  if (Pubid != NULL) xmlFree(Pubid);
5448
357k
    }
5449
375k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
5.58M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
5.58M
    const xmlChar *name = NULL;
5478
5.58M
    xmlChar *value = NULL;
5479
5.58M
    xmlChar *URI = NULL, *literal = NULL;
5480
5.58M
    const xmlChar *ndata = NULL;
5481
5.58M
    int isParameter = 0;
5482
5.58M
    xmlChar *orig = NULL;
5483
5484
5.58M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
5.58M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
5.58M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
5.58M
  int inputid = ctxt->input->id;
5491
5.58M
  SHRINK;
5492
5.58M
  SKIP(6);
5493
5.58M
  if (SKIP_BLANKS == 0) {
5494
19.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
19.7k
         "Space required after '<!ENTITY'\n");
5496
19.7k
  }
5497
5498
5.58M
  if (RAW == '%') {
5499
3.17M
      NEXT;
5500
3.17M
      if (SKIP_BLANKS == 0) {
5501
1.14k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
1.14k
             "Space required after '%%'\n");
5503
1.14k
      }
5504
3.17M
      isParameter = 1;
5505
3.17M
  }
5506
5507
5.58M
        name = xmlParseName(ctxt);
5508
5.58M
  if (name == NULL) {
5509
25.2k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
25.2k
                     "xmlParseEntityDecl: no name\n");
5511
25.2k
            return;
5512
25.2k
  }
5513
5.55M
  if (xmlStrchr(name, ':') != NULL) {
5514
6.38k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
6.38k
         "colons are forbidden from entities names '%s'\n",
5516
6.38k
         name, NULL, NULL);
5517
6.38k
  }
5518
5.55M
  if (SKIP_BLANKS == 0) {
5519
23.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
23.6k
         "Space required after the entity name\n");
5521
23.6k
  }
5522
5523
5.55M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
5.55M
  if (isParameter) {
5528
3.16M
      if ((RAW == '"') || (RAW == '\'')) {
5529
3.12M
          value = xmlParseEntityValue(ctxt, &orig);
5530
3.12M
    if (value) {
5531
3.10M
        if ((ctxt->sax != NULL) &&
5532
3.10M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
3.03M
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
3.03M
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
3.03M
            NULL, NULL, value);
5536
3.10M
    }
5537
3.12M
      } else {
5538
39.9k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
39.9k
    if ((URI == NULL) && (literal == NULL)) {
5540
2.60k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
2.60k
    }
5542
39.9k
    if (URI) {
5543
36.9k
        xmlURIPtr uri;
5544
5545
36.9k
        uri = xmlParseURI((const char *) URI);
5546
36.9k
        if (uri == NULL) {
5547
1.12k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.12k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
35.8k
        } else {
5555
35.8k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
135
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
35.7k
      } else {
5562
35.7k
          if ((ctxt->sax != NULL) &&
5563
35.7k
        (!ctxt->disableSAX) &&
5564
35.7k
        (ctxt->sax->entityDecl != NULL))
5565
34.9k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
34.9k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
34.9k
              literal, URI, NULL);
5568
35.7k
      }
5569
35.8k
      xmlFreeURI(uri);
5570
35.8k
        }
5571
36.9k
    }
5572
39.9k
      }
5573
3.16M
  } else {
5574
2.38M
      if ((RAW == '"') || (RAW == '\'')) {
5575
1.69M
          value = xmlParseEntityValue(ctxt, &orig);
5576
1.69M
    if ((ctxt->sax != NULL) &&
5577
1.69M
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
1.54M
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
1.54M
        XML_INTERNAL_GENERAL_ENTITY,
5580
1.54M
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
1.69M
    if ((ctxt->myDoc == NULL) ||
5585
1.69M
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
16.5k
        if (ctxt->myDoc == NULL) {
5587
2.00k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
2.00k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
2.00k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
2.00k
        }
5594
16.5k
        if (ctxt->myDoc->intSubset == NULL)
5595
2.00k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
2.00k
              BAD_CAST "fake", NULL, NULL);
5597
5598
16.5k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
16.5k
                    NULL, NULL, value);
5600
16.5k
    }
5601
1.69M
      } else {
5602
696k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
696k
    if ((URI == NULL) && (literal == NULL)) {
5604
27.3k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
27.3k
    }
5606
696k
    if (URI) {
5607
658k
        xmlURIPtr uri;
5608
5609
658k
        uri = xmlParseURI((const char *)URI);
5610
658k
        if (uri == NULL) {
5611
21.9k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
21.9k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
636k
        } else {
5619
636k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
4.03k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
4.03k
      }
5626
636k
      xmlFreeURI(uri);
5627
636k
        }
5628
658k
    }
5629
696k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
19.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
19.4k
           "Space required before 'NDATA'\n");
5632
19.4k
    }
5633
696k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
236k
        SKIP(5);
5635
236k
        if (SKIP_BLANKS == 0) {
5636
8.27k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
8.27k
               "Space required after 'NDATA'\n");
5638
8.27k
        }
5639
236k
        ndata = xmlParseName(ctxt);
5640
236k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
236k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
147k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
147k
            literal, URI, ndata);
5644
460k
    } else {
5645
460k
        if ((ctxt->sax != NULL) &&
5646
460k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
341k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
341k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
341k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
460k
        if ((ctxt->replaceEntities != 0) &&
5655
460k
      ((ctxt->myDoc == NULL) ||
5656
271k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
2.65k
      if (ctxt->myDoc == NULL) {
5658
1.49k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.49k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.49k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.49k
      }
5665
5666
2.65k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.49k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.49k
            BAD_CAST "fake", NULL, NULL);
5669
2.65k
      xmlSAX2EntityDecl(ctxt, name,
5670
2.65k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
2.65k
                  literal, URI, NULL);
5672
2.65k
        }
5673
460k
    }
5674
696k
      }
5675
2.38M
  }
5676
5.55M
  if (ctxt->instate == XML_PARSER_EOF)
5677
1.92k
      goto done;
5678
5.55M
  SKIP_BLANKS;
5679
5.55M
  if (RAW != '>') {
5680
30.3k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
30.3k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
30.3k
      xmlHaltParser(ctxt);
5683
5.52M
  } else {
5684
5.52M
      if (inputid != ctxt->input->id) {
5685
503
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
503
                         "Entity declaration doesn't start and stop in"
5687
503
                               " the same entity\n");
5688
503
      }
5689
5.52M
      NEXT;
5690
5.52M
  }
5691
5.55M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
4.78M
      xmlEntityPtr cur = NULL;
5696
5697
4.78M
      if (isParameter) {
5698
3.11M
          if ((ctxt->sax != NULL) &&
5699
3.11M
        (ctxt->sax->getParameterEntity != NULL))
5700
3.11M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
3.11M
      } else {
5702
1.67M
          if ((ctxt->sax != NULL) &&
5703
1.67M
        (ctxt->sax->getEntity != NULL))
5704
1.67M
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
1.67M
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
65.3k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
65.3k
    }
5708
1.67M
      }
5709
4.78M
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
4.39M
    cur->orig = orig;
5711
4.39M
                orig = NULL;
5712
4.39M
      }
5713
4.78M
  }
5714
5715
5.55M
done:
5716
5.55M
  if (value != NULL) xmlFree(value);
5717
5.55M
  if (URI != NULL) xmlFree(URI);
5718
5.55M
  if (literal != NULL) xmlFree(literal);
5719
5.55M
        if (orig != NULL) xmlFree(orig);
5720
5.55M
    }
5721
5.58M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
19.2M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
19.2M
    int val;
5757
19.2M
    xmlChar *ret;
5758
5759
19.2M
    *value = NULL;
5760
19.2M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
1.22M
  SKIP(9);
5762
1.22M
  return(XML_ATTRIBUTE_REQUIRED);
5763
1.22M
    }
5764
18.0M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
16.7M
  SKIP(8);
5766
16.7M
  return(XML_ATTRIBUTE_IMPLIED);
5767
16.7M
    }
5768
1.25M
    val = XML_ATTRIBUTE_NONE;
5769
1.25M
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
792k
  SKIP(6);
5771
792k
  val = XML_ATTRIBUTE_FIXED;
5772
792k
  if (SKIP_BLANKS == 0) {
5773
397
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
397
         "Space required after '#FIXED'\n");
5775
397
  }
5776
792k
    }
5777
1.25M
    ret = xmlParseAttValue(ctxt);
5778
1.25M
    ctxt->instate = XML_PARSER_DTD;
5779
1.25M
    if (ret == NULL) {
5780
16.5k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
16.5k
           "Attribute default value declaration error\n");
5782
16.5k
    } else
5783
1.23M
        *value = ret;
5784
1.25M
    return(val);
5785
18.0M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
86.9k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
86.9k
    const xmlChar *name;
5809
86.9k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
86.9k
    if (RAW != '(') {
5812
2.02k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
2.02k
  return(NULL);
5814
2.02k
    }
5815
84.9k
    SHRINK;
5816
87.7k
    do {
5817
87.7k
        NEXT;
5818
87.7k
  SKIP_BLANKS;
5819
87.7k
        name = xmlParseName(ctxt);
5820
87.7k
  if (name == NULL) {
5821
729
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
729
         "Name expected in NOTATION declaration\n");
5823
729
            xmlFreeEnumeration(ret);
5824
729
      return(NULL);
5825
729
  }
5826
87.0k
  tmp = ret;
5827
91.8k
  while (tmp != NULL) {
5828
5.24k
      if (xmlStrEqual(name, tmp->name)) {
5829
470
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
470
    "standalone: attribute notation value token %s duplicated\n",
5831
470
         name, NULL);
5832
470
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
470
    break;
5835
470
      }
5836
4.77k
      tmp = tmp->next;
5837
4.77k
  }
5838
87.0k
  if (tmp == NULL) {
5839
86.5k
      cur = xmlCreateEnumeration(name);
5840
86.5k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
86.5k
      if (last == NULL) ret = last = cur;
5845
2.19k
      else {
5846
2.19k
    last->next = cur;
5847
2.19k
    last = cur;
5848
2.19k
      }
5849
86.5k
  }
5850
87.0k
  SKIP_BLANKS;
5851
87.0k
    } while (RAW == '|');
5852
84.2k
    if (RAW != ')') {
5853
7.77k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
7.77k
        xmlFreeEnumeration(ret);
5855
7.77k
  return(NULL);
5856
7.77k
    }
5857
76.4k
    NEXT;
5858
76.4k
    return(ret);
5859
84.2k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
1.36M
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
1.36M
    xmlChar *name;
5881
1.36M
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
1.36M
    if (RAW != '(') {
5884
23.8k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
23.8k
  return(NULL);
5886
23.8k
    }
5887
1.33M
    SHRINK;
5888
4.00M
    do {
5889
4.00M
        NEXT;
5890
4.00M
  SKIP_BLANKS;
5891
4.00M
        name = xmlParseNmtoken(ctxt);
5892
4.00M
  if (name == NULL) {
5893
6.16k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
6.16k
      return(ret);
5895
6.16k
  }
5896
3.99M
  tmp = ret;
5897
10.4M
  while (tmp != NULL) {
5898
6.45M
      if (xmlStrEqual(name, tmp->name)) {
5899
1.17k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
1.17k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
1.17k
         name, NULL);
5902
1.17k
    if (!xmlDictOwns(ctxt->dict, name))
5903
1.17k
        xmlFree(name);
5904
1.17k
    break;
5905
1.17k
      }
5906
6.44M
      tmp = tmp->next;
5907
6.44M
  }
5908
3.99M
  if (tmp == NULL) {
5909
3.99M
      cur = xmlCreateEnumeration(name);
5910
3.99M
      if (!xmlDictOwns(ctxt->dict, name))
5911
3.99M
    xmlFree(name);
5912
3.99M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
3.99M
      if (last == NULL) ret = last = cur;
5917
2.66M
      else {
5918
2.66M
    last->next = cur;
5919
2.66M
    last = cur;
5920
2.66M
      }
5921
3.99M
  }
5922
3.99M
  SKIP_BLANKS;
5923
3.99M
    } while (RAW == '|');
5924
1.33M
    if (RAW != ')') {
5925
15.5k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
15.5k
  return(ret);
5927
15.5k
    }
5928
1.31M
    NEXT;
5929
1.31M
    return(ret);
5930
1.33M
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
1.44M
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
1.44M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
87.3k
  SKIP(8);
5953
87.3k
  if (SKIP_BLANKS == 0) {
5954
408
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
408
         "Space required after 'NOTATION'\n");
5956
408
      return(0);
5957
408
  }
5958
86.9k
  *tree = xmlParseNotationType(ctxt);
5959
86.9k
  if (*tree == NULL) return(0);
5960
76.4k
  return(XML_ATTRIBUTE_NOTATION);
5961
86.9k
    }
5962
1.36M
    *tree = xmlParseEnumerationType(ctxt);
5963
1.36M
    if (*tree == NULL) return(0);
5964
1.33M
    return(XML_ATTRIBUTE_ENUMERATION);
5965
1.36M
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
19.3M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
19.3M
    SHRINK;
6017
19.3M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
5.16M
  SKIP(5);
6019
5.16M
  return(XML_ATTRIBUTE_CDATA);
6020
14.1M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
154k
  SKIP(6);
6022
154k
  return(XML_ATTRIBUTE_IDREFS);
6023
13.9M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
733k
  SKIP(5);
6025
733k
  return(XML_ATTRIBUTE_IDREF);
6026
13.2M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
4.42M
        SKIP(2);
6028
4.42M
  return(XML_ATTRIBUTE_ID);
6029
8.83M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
86.3k
  SKIP(6);
6031
86.3k
  return(XML_ATTRIBUTE_ENTITY);
6032
8.74M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
40.2k
  SKIP(8);
6034
40.2k
  return(XML_ATTRIBUTE_ENTITIES);
6035
8.70M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
3.88M
  SKIP(8);
6037
3.88M
  return(XML_ATTRIBUTE_NMTOKENS);
6038
4.81M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
3.37M
  SKIP(7);
6040
3.37M
  return(XML_ATTRIBUTE_NMTOKEN);
6041
3.37M
     }
6042
1.44M
     return(xmlParseEnumeratedType(ctxt, tree));
6043
19.3M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
9.86M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
9.86M
    const xmlChar *elemName;
6061
9.86M
    const xmlChar *attrName;
6062
9.86M
    xmlEnumerationPtr tree;
6063
6064
9.86M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
9.86M
    SKIP(2);
6067
6068
9.86M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
9.86M
  int inputid = ctxt->input->id;
6070
6071
9.86M
  SKIP(7);
6072
9.86M
  if (SKIP_BLANKS == 0) {
6073
29.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
29.7k
                     "Space required after '<!ATTLIST'\n");
6075
29.7k
  }
6076
9.86M
        elemName = xmlParseName(ctxt);
6077
9.86M
  if (elemName == NULL) {
6078
19.0k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
19.0k
         "ATTLIST: no name for Element\n");
6080
19.0k
      return;
6081
19.0k
  }
6082
9.84M
  SKIP_BLANKS;
6083
9.84M
  GROW;
6084
29.0M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
19.4M
      int type;
6086
19.4M
      int def;
6087
19.4M
      xmlChar *defaultValue = NULL;
6088
6089
19.4M
      GROW;
6090
19.4M
            tree = NULL;
6091
19.4M
      attrName = xmlParseName(ctxt);
6092
19.4M
      if (attrName == NULL) {
6093
22.7k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
22.7k
             "ATTLIST: no name for Attribute\n");
6095
22.7k
    break;
6096
22.7k
      }
6097
19.3M
      GROW;
6098
19.3M
      if (SKIP_BLANKS == 0) {
6099
77.1k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
77.1k
            "Space required after the attribute name\n");
6101
77.1k
    break;
6102
77.1k
      }
6103
6104
19.3M
      type = xmlParseAttributeType(ctxt, &tree);
6105
19.3M
      if (type <= 0) {
6106
39.9k
          break;
6107
39.9k
      }
6108
6109
19.2M
      GROW;
6110
19.2M
      if (SKIP_BLANKS == 0) {
6111
25.1k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
25.1k
             "Space required after the attribute type\n");
6113
25.1k
          if (tree != NULL)
6114
18.5k
        xmlFreeEnumeration(tree);
6115
25.1k
    break;
6116
25.1k
      }
6117
6118
19.2M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
19.2M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
19.2M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
422k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
19.2M
      GROW;
6130
19.2M
            if (RAW != '>') {
6131
13.8M
    if (SKIP_BLANKS == 0) {
6132
34.6k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
34.6k
      "Space required after the attribute default value\n");
6134
34.6k
        if (defaultValue != NULL)
6135
16.2k
      xmlFree(defaultValue);
6136
34.6k
        if (tree != NULL)
6137
8.96k
      xmlFreeEnumeration(tree);
6138
34.6k
        break;
6139
34.6k
    }
6140
13.8M
      }
6141
19.2M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
19.2M
    (ctxt->sax->attributeDecl != NULL))
6143
17.8M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
17.8M
                          type, def, defaultValue, tree);
6145
1.36M
      else if (tree != NULL)
6146
106k
    xmlFreeEnumeration(tree);
6147
6148
19.2M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
19.2M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
19.2M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
671k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
671k
      }
6153
19.2M
      if (ctxt->sax2) {
6154
9.74M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
9.74M
      }
6156
19.2M
      if (defaultValue != NULL)
6157
1.21M
          xmlFree(defaultValue);
6158
19.2M
      GROW;
6159
19.2M
  }
6160
9.84M
  if (RAW == '>') {
6161
9.67M
      if (inputid != ctxt->input->id) {
6162
11.6k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
11.6k
                               "Attribute list declaration doesn't start and"
6164
11.6k
                               " stop in the same entity\n");
6165
11.6k
      }
6166
9.67M
      NEXT;
6167
9.67M
  }
6168
9.84M
    }
6169
9.86M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
2.18M
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
2.18M
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
2.18M
    const xmlChar *elem = NULL;
6196
6197
2.18M
    GROW;
6198
2.18M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
2.18M
  SKIP(7);
6200
2.18M
  SKIP_BLANKS;
6201
2.18M
  SHRINK;
6202
2.18M
  if (RAW == ')') {
6203
1.25M
      if (ctxt->input->id != inputchk) {
6204
27
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
27
                               "Element content declaration doesn't start and"
6206
27
                               " stop in the same entity\n");
6207
27
      }
6208
1.25M
      NEXT;
6209
1.25M
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
1.25M
      if (ret == NULL)
6211
0
          return(NULL);
6212
1.25M
      if (RAW == '*') {
6213
267
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
267
    NEXT;
6215
267
      }
6216
1.25M
      return(ret);
6217
1.25M
  }
6218
930k
  if ((RAW == '(') || (RAW == '|')) {
6219
928k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
928k
      if (ret == NULL) return(NULL);
6221
928k
  }
6222
10.8M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
9.90M
      NEXT;
6224
9.90M
      if (elem == NULL) {
6225
928k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
928k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
928k
    ret->c1 = cur;
6231
928k
    if (cur != NULL)
6232
928k
        cur->parent = ret;
6233
928k
    cur = ret;
6234
8.97M
      } else {
6235
8.97M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
8.97M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
8.97M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
8.97M
    if (n->c1 != NULL)
6242
8.97M
        n->c1->parent = n;
6243
8.97M
          cur->c2 = n;
6244
8.97M
    if (n != NULL)
6245
8.97M
        n->parent = cur;
6246
8.97M
    cur = n;
6247
8.97M
      }
6248
9.90M
      SKIP_BLANKS;
6249
9.90M
      elem = xmlParseName(ctxt);
6250
9.90M
      if (elem == NULL) {
6251
772
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
772
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
772
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
772
    return(NULL);
6255
772
      }
6256
9.90M
      SKIP_BLANKS;
6257
9.90M
      GROW;
6258
9.90M
  }
6259
929k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
925k
      if (elem != NULL) {
6261
925k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
925k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
925k
    if (cur->c2 != NULL)
6264
925k
        cur->c2->parent = cur;
6265
925k
            }
6266
925k
            if (ret != NULL)
6267
925k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
925k
      if (ctxt->input->id != inputchk) {
6269
362
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
362
                               "Element content declaration doesn't start and"
6271
362
                               " stop in the same entity\n");
6272
362
      }
6273
925k
      SKIP(2);
6274
925k
  } else {
6275
4.25k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
4.25k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
4.25k
      return(NULL);
6278
4.25k
  }
6279
6280
929k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
925k
    return(ret);
6284
2.18M
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
3.13M
                                       int depth) {
6321
3.13M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
3.13M
    const xmlChar *elem;
6323
3.13M
    xmlChar type = 0;
6324
6325
3.13M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
3.13M
        (depth >  2048)) {
6327
127
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
127
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
127
                          depth);
6330
127
  return(NULL);
6331
127
    }
6332
3.13M
    SKIP_BLANKS;
6333
3.13M
    GROW;
6334
3.13M
    if (RAW == '(') {
6335
233k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
233k
  NEXT;
6339
233k
  SKIP_BLANKS;
6340
233k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
233k
                                                           depth + 1);
6342
233k
        if (cur == NULL)
6343
121k
            return(NULL);
6344
112k
  SKIP_BLANKS;
6345
112k
  GROW;
6346
2.90M
    } else {
6347
2.90M
  elem = xmlParseName(ctxt);
6348
2.90M
  if (elem == NULL) {
6349
15.4k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
15.4k
      return(NULL);
6351
15.4k
  }
6352
2.88M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
2.88M
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
2.88M
  GROW;
6358
2.88M
  if (RAW == '?') {
6359
213k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
213k
      NEXT;
6361
2.67M
  } else if (RAW == '*') {
6362
165k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
165k
      NEXT;
6364
2.50M
  } else if (RAW == '+') {
6365
635k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
635k
      NEXT;
6367
1.87M
  } else {
6368
1.87M
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
1.87M
  }
6370
2.88M
  GROW;
6371
2.88M
    }
6372
3.00M
    SKIP_BLANKS;
6373
3.00M
    SHRINK;
6374
14.4M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
11.4M
        if (RAW == ',') {
6379
2.63M
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
1.57M
      else if (type != CUR) {
6385
292
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
292
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
292
                      type);
6388
292
    if ((last != NULL) && (last != ret))
6389
292
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
292
    if (ret != NULL)
6391
292
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
292
    return(NULL);
6393
292
      }
6394
2.63M
      NEXT;
6395
6396
2.63M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
2.63M
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
2.63M
      if (last == NULL) {
6404
1.05M
    op->c1 = ret;
6405
1.05M
    if (ret != NULL)
6406
1.05M
        ret->parent = op;
6407
1.05M
    ret = cur = op;
6408
1.57M
      } else {
6409
1.57M
          cur->c2 = op;
6410
1.57M
    if (op != NULL)
6411
1.57M
        op->parent = cur;
6412
1.57M
    op->c1 = last;
6413
1.57M
    if (last != NULL)
6414
1.57M
        last->parent = op;
6415
1.57M
    cur =op;
6416
1.57M
    last = NULL;
6417
1.57M
      }
6418
8.82M
  } else if (RAW == '|') {
6419
8.80M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
7.82M
      else if (type != CUR) {
6425
162
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
162
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
162
          type);
6428
162
    if ((last != NULL) && (last != ret))
6429
162
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
162
    if (ret != NULL)
6431
162
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
162
    return(NULL);
6433
162
      }
6434
8.80M
      NEXT;
6435
6436
8.80M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
8.80M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
8.80M
      if (last == NULL) {
6445
976k
    op->c1 = ret;
6446
976k
    if (ret != NULL)
6447
976k
        ret->parent = op;
6448
976k
    ret = cur = op;
6449
7.82M
      } else {
6450
7.82M
          cur->c2 = op;
6451
7.82M
    if (op != NULL)
6452
7.82M
        op->parent = cur;
6453
7.82M
    op->c1 = last;
6454
7.82M
    if (last != NULL)
6455
7.82M
        last->parent = op;
6456
7.82M
    cur =op;
6457
7.82M
    last = NULL;
6458
7.82M
      }
6459
8.80M
  } else {
6460
18.9k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
18.9k
      if ((last != NULL) && (last != ret))
6462
4.76k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
18.9k
      if (ret != NULL)
6464
18.9k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
18.9k
      return(NULL);
6466
18.9k
  }
6467
11.4M
  GROW;
6468
11.4M
  SKIP_BLANKS;
6469
11.4M
  GROW;
6470
11.4M
  if (RAW == '(') {
6471
511k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
511k
      NEXT;
6474
511k
      SKIP_BLANKS;
6475
511k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
511k
                                                          depth + 1);
6477
511k
            if (last == NULL) {
6478
2.60k
    if (ret != NULL)
6479
2.60k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
2.60k
    return(NULL);
6481
2.60k
            }
6482
509k
      SKIP_BLANKS;
6483
10.9M
  } else {
6484
10.9M
      elem = xmlParseName(ctxt);
6485
10.9M
      if (elem == NULL) {
6486
2.83k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.83k
    if (ret != NULL)
6488
2.83k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.83k
    return(NULL);
6490
2.83k
      }
6491
10.9M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
10.9M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
10.9M
      if (RAW == '?') {
6498
874k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
874k
    NEXT;
6500
10.0M
      } else if (RAW == '*') {
6501
567k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
567k
    NEXT;
6503
9.47M
      } else if (RAW == '+') {
6504
143k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
143k
    NEXT;
6506
9.33M
      } else {
6507
9.33M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
9.33M
      }
6509
10.9M
  }
6510
11.4M
  SKIP_BLANKS;
6511
11.4M
  GROW;
6512
11.4M
    }
6513
2.97M
    if ((cur != NULL) && (last != NULL)) {
6514
2.01M
        cur->c2 = last;
6515
2.01M
  if (last != NULL)
6516
2.01M
      last->parent = cur;
6517
2.01M
    }
6518
2.97M
    if (ctxt->input->id != inputchk) {
6519
1.11k
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
1.11k
                       "Element content declaration doesn't start and stop in"
6521
1.11k
                       " the same entity\n");
6522
1.11k
    }
6523
2.97M
    NEXT;
6524
2.97M
    if (RAW == '?') {
6525
67.4k
  if (ret != NULL) {
6526
67.4k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
67.4k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
349
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
67.1k
      else
6530
67.1k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
67.4k
  }
6532
67.4k
  NEXT;
6533
2.90M
    } else if (RAW == '*') {
6534
613k
  if (ret != NULL) {
6535
613k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
613k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
5.46M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
4.85M
    if ((cur->c1 != NULL) &&
6543
4.85M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
4.85M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
24.8k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
4.85M
    if ((cur->c2 != NULL) &&
6547
4.85M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
4.85M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
4.04k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
4.85M
    cur = cur->c2;
6551
4.85M
      }
6552
613k
  }
6553
613k
  NEXT;
6554
2.29M
    } else if (RAW == '+') {
6555
494k
  if (ret != NULL) {
6556
494k
      int found = 0;
6557
6558
494k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
494k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
252
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
493k
      else
6562
493k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
875k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
381k
    if ((cur->c1 != NULL) &&
6570
381k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
381k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
589
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
589
        found = 1;
6574
589
    }
6575
381k
    if ((cur->c2 != NULL) &&
6576
381k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
381k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
224
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
224
        found = 1;
6580
224
    }
6581
381k
    cur = cur->c2;
6582
381k
      }
6583
494k
      if (found)
6584
669
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
494k
  }
6586
494k
  NEXT;
6587
494k
    }
6588
2.97M
    return(ret);
6589
3.00M
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
4.57M
                           xmlElementContentPtr *result) {
6648
6649
4.57M
    xmlElementContentPtr tree = NULL;
6650
4.57M
    int inputid = ctxt->input->id;
6651
4.57M
    int res;
6652
6653
4.57M
    *result = NULL;
6654
6655
4.57M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
4.57M
    NEXT;
6661
4.57M
    GROW;
6662
4.57M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
4.57M
    SKIP_BLANKS;
6665
4.57M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
2.18M
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
2.18M
  res = XML_ELEMENT_TYPE_MIXED;
6668
2.39M
    } else {
6669
2.39M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
2.39M
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
2.39M
    }
6672
4.57M
    SKIP_BLANKS;
6673
4.57M
    *result = tree;
6674
4.57M
    return(res);
6675
4.57M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
5.56M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
5.56M
    const xmlChar *name;
6695
5.56M
    int ret = -1;
6696
5.56M
    xmlElementContentPtr content  = NULL;
6697
6698
5.56M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
5.56M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
5.56M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
5.56M
  int inputid = ctxt->input->id;
6705
6706
5.56M
  SKIP(7);
6707
5.56M
  if (SKIP_BLANKS == 0) {
6708
2.38k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
2.38k
               "Space required after 'ELEMENT'\n");
6710
2.38k
      return(-1);
6711
2.38k
  }
6712
5.55M
        name = xmlParseName(ctxt);
6713
5.55M
  if (name == NULL) {
6714
3.51k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
3.51k
         "xmlParseElementDecl: no name for Element\n");
6716
3.51k
      return(-1);
6717
3.51k
  }
6718
5.55M
  if (SKIP_BLANKS == 0) {
6719
17.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
17.1k
         "Space required after the element name\n");
6721
17.1k
  }
6722
5.55M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
838k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
838k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
4.71M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
4.71M
             (NXT(2) == 'Y')) {
6730
128k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
128k
      ret = XML_ELEMENT_TYPE_ANY;
6735
4.58M
  } else if (RAW == '(') {
6736
4.57M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
4.57M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
12.6k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
12.6k
          (ctxt->inputNr == 1)) {
6743
364
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
364
    "PEReference: forbidden within markup decl in internal subset\n");
6745
12.3k
      } else {
6746
12.3k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
12.3k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
12.3k
            }
6749
12.6k
      return(-1);
6750
12.6k
  }
6751
6752
5.54M
  SKIP_BLANKS;
6753
6754
5.54M
  if (RAW != '>') {
6755
35.7k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
35.7k
      if (content != NULL) {
6757
4.56k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
4.56k
      }
6759
5.50M
  } else {
6760
5.50M
      if (inputid != ctxt->input->id) {
6761
1.56k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
1.56k
                               "Element declaration doesn't start and stop in"
6763
1.56k
                               " the same entity\n");
6764
1.56k
      }
6765
6766
5.50M
      NEXT;
6767
5.50M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
5.50M
    (ctxt->sax->elementDecl != NULL)) {
6769
5.13M
    if (content != NULL)
6770
4.25M
        content->parent = NULL;
6771
5.13M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
5.13M
                           content);
6773
5.13M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
83.5k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
83.5k
    }
6782
5.13M
      } else if (content != NULL) {
6783
268k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
268k
      }
6785
5.50M
  }
6786
5.54M
    }
6787
5.54M
    return(ret);
6788
5.56M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
50.2k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
50.2k
    int *inputIds = NULL;
6806
50.2k
    size_t inputIdsSize = 0;
6807
50.2k
    size_t depth = 0;
6808
6809
105k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
104k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
69.0k
            int id = ctxt->input->id;
6812
6813
69.0k
            SKIP(3);
6814
69.0k
            SKIP_BLANKS;
6815
6816
69.0k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
30.5k
                SKIP(7);
6818
30.5k
                SKIP_BLANKS;
6819
30.5k
                if (RAW != '[') {
6820
315
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
315
                    xmlHaltParser(ctxt);
6822
315
                    goto error;
6823
315
                }
6824
30.1k
                if (ctxt->input->id != id) {
6825
15
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
15
                                   "All markup of the conditional section is"
6827
15
                                   " not in the same entity\n");
6828
15
                }
6829
30.1k
                NEXT;
6830
6831
30.1k
                if (inputIdsSize <= depth) {
6832
14.5k
                    int *tmp;
6833
6834
14.5k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
14.5k
                    tmp = (int *) xmlRealloc(inputIds,
6836
14.5k
                            inputIdsSize * sizeof(int));
6837
14.5k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
14.5k
                    inputIds = tmp;
6842
14.5k
                }
6843
30.1k
                inputIds[depth] = id;
6844
30.1k
                depth++;
6845
38.4k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
36.0k
                size_t ignoreDepth = 0;
6847
6848
36.0k
                SKIP(6);
6849
36.0k
                SKIP_BLANKS;
6850
36.0k
                if (RAW != '[') {
6851
137
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
137
                    xmlHaltParser(ctxt);
6853
137
                    goto error;
6854
137
                }
6855
35.9k
                if (ctxt->input->id != id) {
6856
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
6
                                   "All markup of the conditional section is"
6858
6
                                   " not in the same entity\n");
6859
6
                }
6860
35.9k
                NEXT;
6861
6862
4.70M
                while (RAW != 0) {
6863
4.70M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
38.3k
                        SKIP(3);
6865
38.3k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
38.3k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
4.66M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
4.66M
                               (NXT(2) == '>')) {
6873
46.0k
                        if (ignoreDepth == 0)
6874
32.9k
                            break;
6875
13.0k
                        SKIP(3);
6876
13.0k
                        ignoreDepth--;
6877
4.62M
                    } else {
6878
4.62M
                        NEXT;
6879
4.62M
                    }
6880
4.70M
                }
6881
6882
35.9k
    if (RAW == 0) {
6883
2.99k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.99k
                    goto error;
6885
2.99k
    }
6886
32.9k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
32.9k
                SKIP(3);
6892
32.9k
            } else {
6893
2.39k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
2.39k
                xmlHaltParser(ctxt);
6895
2.39k
                goto error;
6896
2.39k
            }
6897
69.0k
        } else if ((depth > 0) &&
6898
35.7k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
16.5k
            depth--;
6900
16.5k
            if (ctxt->input->id != inputIds[depth]) {
6901
208
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
208
                               "All markup of the conditional section is not"
6903
208
                               " in the same entity\n");
6904
208
            }
6905
16.5k
            SKIP(3);
6906
19.2k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
16.1k
            xmlParseMarkupDecl(ctxt);
6908
16.1k
        } else {
6909
3.10k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
3.10k
            xmlHaltParser(ctxt);
6911
3.10k
            goto error;
6912
3.10k
        }
6913
6914
95.8k
        if (depth == 0)
6915
40.9k
            break;
6916
6917
54.8k
        SKIP_BLANKS;
6918
54.8k
        GROW;
6919
54.8k
    }
6920
6921
50.2k
error:
6922
50.2k
    xmlFree(inputIds);
6923
50.2k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
526M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
526M
    GROW;
6952
526M
    if (CUR == '<') {
6953
526M
        if (NXT(1) == '!') {
6954
525M
      switch (NXT(2)) {
6955
11.1M
          case 'E':
6956
11.1M
        if (NXT(3) == 'L')
6957
5.56M
      xmlParseElementDecl(ctxt);
6958
5.58M
        else if (NXT(3) == 'N')
6959
5.58M
      xmlParseEntityDecl(ctxt);
6960
1.72k
                    else
6961
1.72k
                        SKIP(2);
6962
11.1M
        break;
6963
9.86M
          case 'A':
6964
9.86M
        xmlParseAttributeListDecl(ctxt);
6965
9.86M
        break;
6966
375k
          case 'N':
6967
375k
        xmlParseNotationDecl(ctxt);
6968
375k
        break;
6969
504M
          case '-':
6970
504M
        xmlParseComment(ctxt);
6971
504M
        break;
6972
305k
    default:
6973
        /* there is an error but it will be detected later */
6974
305k
                    SKIP(2);
6975
305k
        break;
6976
525M
      }
6977
525M
  } else if (NXT(1) == '?') {
6978
781k
      xmlParsePI(ctxt);
6979
781k
  }
6980
526M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
526M
    if (ctxt->instate == XML_PARSER_EOF)
6987
32.4k
        return;
6988
6989
526M
    ctxt->instate = XML_PARSER_DTD;
6990
526M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
21.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
21.9k
    xmlChar *version;
7006
21.9k
    const xmlChar *encoding;
7007
21.9k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
21.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
21.4k
  SKIP(5);
7014
21.4k
    } else {
7015
430
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
430
  return;
7017
430
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
21.4k
    oldstate = ctxt->instate;
7021
21.4k
    ctxt->instate = XML_PARSER_START;
7022
7023
21.4k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
21.4k
    version = xmlParseVersionInfo(ctxt);
7032
21.4k
    if (version == NULL)
7033
1.94k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
19.5k
    else {
7035
19.5k
  if (SKIP_BLANKS == 0) {
7036
1.39k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.39k
               "Space needed here\n");
7038
1.39k
  }
7039
19.5k
    }
7040
21.4k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
21.4k
    encoding = xmlParseEncodingDecl(ctxt);
7046
21.4k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
21.4k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
359
        ctxt->instate = oldstate;
7053
359
        return;
7054
359
    }
7055
21.1k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
5.26k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
5.26k
           "Missing encoding in text declaration\n");
7058
5.26k
    }
7059
7060
21.1k
    SKIP_BLANKS;
7061
21.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
16.1k
        SKIP(2);
7063
16.1k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
145
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
145
  NEXT;
7067
4.86k
    } else {
7068
4.86k
        int c;
7069
7070
4.86k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
353k
        while ((c = CUR) != 0) {
7072
351k
            NEXT;
7073
351k
            if (c == '>')
7074
3.14k
                break;
7075
351k
        }
7076
4.86k
    }
7077
7078
21.1k
    ctxt->instate = oldstate;
7079
21.1k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
123k
                       const xmlChar *SystemID) {
7096
123k
    xmlDetectSAX2(ctxt);
7097
123k
    GROW;
7098
7099
123k
    if ((ctxt->encoding == NULL) &&
7100
123k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
122k
        xmlChar start[4];
7102
122k
  xmlCharEncoding enc;
7103
7104
122k
  start[0] = RAW;
7105
122k
  start[1] = NXT(1);
7106
122k
  start[2] = NXT(2);
7107
122k
  start[3] = NXT(3);
7108
122k
  enc = xmlDetectCharEncoding(start, 4);
7109
122k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
18.3k
      xmlSwitchEncoding(ctxt, enc);
7111
122k
    }
7112
7113
123k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
16.6k
  xmlParseTextDecl(ctxt);
7115
16.6k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
293
      xmlHaltParser(ctxt);
7120
293
      return;
7121
293
  }
7122
16.6k
    }
7123
122k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
122k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
122k
    ctxt->instate = XML_PARSER_DTD;
7135
122k
    ctxt->external = 1;
7136
122k
    SKIP_BLANKS;
7137
108M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
108M
  GROW;
7139
108M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
50.2k
            xmlParseConditionalSections(ctxt);
7141
108M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
108M
            xmlParseMarkupDecl(ctxt);
7143
108M
        } else {
7144
42.8k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
42.8k
            xmlHaltParser(ctxt);
7146
42.8k
            return;
7147
42.8k
        }
7148
108M
        SKIP_BLANKS;
7149
108M
    }
7150
7151
79.9k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
79.9k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
45.8M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
45.8M
    xmlEntityPtr ent;
7175
45.8M
    xmlChar *val;
7176
45.8M
    int was_checked;
7177
45.8M
    xmlNodePtr list = NULL;
7178
45.8M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
45.8M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
45.8M
    if (NXT(1) == '#') {
7188
529k
  int i = 0;
7189
529k
  xmlChar out[16];
7190
529k
  int hex = NXT(2);
7191
529k
  int value = xmlParseCharRef(ctxt);
7192
7193
529k
  if (value == 0)
7194
96.2k
      return;
7195
433k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
318k
      if (value <= 0xFF) {
7202
303k
    out[0] = value;
7203
303k
    out[1] = 0;
7204
303k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
303k
        (!ctxt->disableSAX))
7206
258k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
303k
      } else {
7208
14.6k
    if ((hex == 'x') || (hex == 'X'))
7209
1.84k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
12.7k
    else
7211
12.7k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
14.6k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
14.6k
        (!ctxt->disableSAX))
7214
12.0k
        ctxt->sax->reference(ctxt->userData, out);
7215
14.6k
      }
7216
318k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
114k
      COPY_BUF(0 ,out, i, value);
7221
114k
      out[i] = 0;
7222
114k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
114k
    (!ctxt->disableSAX))
7224
98.8k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
114k
  }
7226
433k
  return;
7227
529k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
45.3M
    ent = xmlParseEntityRef(ctxt);
7233
45.3M
    if (ent == NULL) return;
7234
44.0M
    if (!ctxt->wellFormed)
7235
22.9M
  return;
7236
21.1M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
21.1M
    if ((ent->name == NULL) ||
7240
21.1M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
582k
  val = ent->content;
7242
582k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
582k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
582k
      (!ctxt->disableSAX))
7248
582k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
582k
  return;
7250
582k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
20.5M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
20.5M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
278k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
268k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
268k
  void *user_data;
7273
268k
  if (ctxt->userData == ctxt)
7274
268k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
268k
        ctxt->sizeentcopy = 0;
7280
7281
268k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
1.12k
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
1.12k
            xmlHaltParser(ctxt);
7284
1.12k
            return;
7285
1.12k
        }
7286
7287
267k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
267k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
172k
      ctxt->depth++;
7297
172k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
172k
                                                user_data, &list);
7299
172k
      ctxt->depth--;
7300
7301
172k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
95.0k
      ctxt->depth++;
7303
95.0k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
95.0k
                                     user_data, ctxt->depth, ent->URI,
7305
95.0k
             ent->ExternalID, &list);
7306
95.0k
      ctxt->depth--;
7307
95.0k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
267k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
267k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
267k
        ent->expandedSize = ctxt->sizeentcopy;
7316
267k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
6.79k
            xmlHaltParser(ctxt);
7318
6.79k
      xmlFreeNodeList(list);
7319
6.79k
      return;
7320
6.79k
  }
7321
260k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
260k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
172k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
172k
            if ((ctxt->replaceEntities == 0) ||
7333
172k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
172k
                ((list->type == XML_TEXT_NODE) &&
7335
159k
                 (list->next == NULL))) {
7336
159k
                ent->owner = 1;
7337
1.53M
                while (list != NULL) {
7338
1.37M
                    list->parent = (xmlNodePtr) ent;
7339
1.37M
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
1.37M
                    if (list->next == NULL)
7342
159k
                        ent->last = list;
7343
1.37M
                    list = list->next;
7344
1.37M
                }
7345
159k
                list = NULL;
7346
159k
            } else {
7347
12.8k
                ent->owner = 0;
7348
3.60M
                while (list != NULL) {
7349
3.59M
                    list->parent = (xmlNodePtr) ctxt->node;
7350
3.59M
                    list->doc = ctxt->myDoc;
7351
3.59M
                    if (list->next == NULL)
7352
12.8k
                        ent->last = list;
7353
3.59M
                    list = list->next;
7354
3.59M
                }
7355
12.8k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
12.8k
            }
7361
172k
  } else if ((ret != XML_ERR_OK) &&
7362
88.1k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
48.5k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
48.5k
         "Entity '%s' failed to parse\n", ent->name);
7365
48.5k
            if (ent->content != NULL)
7366
10.6k
                ent->content[0] = 0;
7367
48.5k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
260k
        was_checked = 0;
7374
260k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
20.5M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
826k
  if (was_checked != 0) {
7389
729k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
729k
      if (ctxt->userData == ctxt)
7396
729k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
729k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
1.95k
    ctxt->depth++;
7402
1.95k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
1.95k
           ent->content, user_data, NULL);
7404
1.95k
    ctxt->depth--;
7405
727k
      } else if (ent->etype ==
7406
727k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
727k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
727k
    ctxt->depth++;
7410
727k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
727k
         ctxt->sax, user_data, ctxt->depth,
7412
727k
         ent->URI, ent->ExternalID, NULL);
7413
727k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
727k
                ctxt->sizeentities = oldsizeentities;
7417
727k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
729k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
729k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
729k
  }
7429
826k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
826k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
168k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
168k
  }
7437
826k
  return;
7438
826k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
19.7M
    if ((was_checked != 0) &&
7445
19.7M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
482
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
19.7M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
19.7M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
1.66M
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
1.66M
  return;
7458
1.66M
    }
7459
7460
18.0M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
18.0M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
18.0M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
18.0M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
5.94M
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
5.94M
    cur = ent->children;
7492
7.22M
    while (cur != NULL) {
7493
7.22M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
7.22M
        if (nw != NULL) {
7495
7.22M
      if (nw->_private == NULL)
7496
7.22M
          nw->_private = cur->_private;
7497
7.22M
      if (firstChild == NULL){
7498
5.94M
          firstChild = nw;
7499
5.94M
      }
7500
7.22M
      nw = xmlAddChild(ctxt->node, nw);
7501
7.22M
        }
7502
7.22M
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
5.94M
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
5.94M
          (nw != NULL) &&
7509
5.94M
          (nw->type == XML_ELEMENT_NODE) &&
7510
5.94M
          (nw->children == NULL))
7511
9.16k
          nw->extra = 1;
7512
7513
5.94M
      break;
7514
5.94M
        }
7515
1.28M
        cur = cur->next;
7516
1.28M
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
12.1M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
12.1M
    xmlNodePtr nw = NULL, cur, next, last,
7523
12.1M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
12.1M
    cur = ent->children;
7532
12.1M
    ent->children = NULL;
7533
12.1M
    last = ent->last;
7534
12.1M
    ent->last = NULL;
7535
28.3M
    while (cur != NULL) {
7536
28.3M
        next = cur->next;
7537
28.3M
        cur->next = NULL;
7538
28.3M
        cur->parent = NULL;
7539
28.3M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
28.3M
        if (nw != NULL) {
7541
28.3M
      if (nw->_private == NULL)
7542
28.3M
          nw->_private = cur->_private;
7543
28.3M
      if (firstChild == NULL){
7544
12.1M
          firstChild = cur;
7545
12.1M
      }
7546
28.3M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
28.3M
        }
7548
28.3M
        xmlAddChild(ctxt->node, cur);
7549
28.3M
        if (cur == last)
7550
12.1M
      break;
7551
16.2M
        cur = next;
7552
16.2M
    }
7553
12.1M
    if (ent->owner == 0)
7554
12.8k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
12.1M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
18.0M
      ctxt->nodemem = 0;
7582
18.0M
      ctxt->nodelen = 0;
7583
18.0M
      return;
7584
18.0M
  }
7585
18.0M
    }
7586
18.0M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
51.6M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
51.6M
    const xmlChar *name;
7621
51.6M
    xmlEntityPtr ent = NULL;
7622
7623
51.6M
    GROW;
7624
51.6M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
51.6M
    if (RAW != '&')
7628
0
        return(NULL);
7629
51.6M
    NEXT;
7630
51.6M
    name = xmlParseName(ctxt);
7631
51.6M
    if (name == NULL) {
7632
185k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
185k
           "xmlParseEntityRef: no name\n");
7634
185k
        return(NULL);
7635
185k
    }
7636
51.4M
    if (RAW != ';') {
7637
156k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
156k
  return(NULL);
7639
156k
    }
7640
51.3M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
51.3M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
35.3M
        ent = xmlGetPredefinedEntity(name);
7647
35.3M
        if (ent != NULL)
7648
1.72M
            return(ent);
7649
35.3M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
49.5M
    if (ctxt->sax != NULL) {
7656
49.5M
  if (ctxt->sax->getEntity != NULL)
7657
49.5M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
49.5M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
49.5M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
29.0k
      ent = xmlGetPredefinedEntity(name);
7661
49.5M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
49.5M
      (ctxt->userData==ctxt)) {
7663
84.8k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
84.8k
  }
7665
49.5M
    }
7666
49.5M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
49.5M
    if (ent == NULL) {
7690
1.16M
  if ((ctxt->standalone == 1) ||
7691
1.16M
      ((ctxt->hasExternalSubset == 0) &&
7692
1.12M
       (ctxt->hasPErefs == 0))) {
7693
675k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
675k
         "Entity '%s' not defined\n", name);
7695
675k
  } else {
7696
485k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
485k
         "Entity '%s' not defined\n", name);
7698
485k
      if ((ctxt->inSubset == 0) &&
7699
485k
    (ctxt->sax != NULL) &&
7700
485k
    (ctxt->sax->reference != NULL)) {
7701
478k
    ctxt->sax->reference(ctxt->userData, name);
7702
478k
      }
7703
485k
  }
7704
1.16M
  ctxt->valid = 0;
7705
1.16M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
48.4M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
1.48k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
1.48k
     "Entity reference to unparsed entity %s\n", name);
7715
1.48k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
48.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
48.4M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
31.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
31.5k
       "Attribute references external entity '%s'\n", name);
7726
31.5k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
48.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
48.3M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
5.78M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
71.3k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
1.70k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
71.3k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
71.3k
        }
7740
5.78M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
128k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
128k
                    "'<' in entity '%s' is not allowed in attributes "
7743
128k
                    "values\n", name);
7744
5.78M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
42.6M
    else {
7750
42.6M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
42.6M
      default:
7758
42.6M
      break;
7759
42.6M
  }
7760
42.6M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
49.5M
    return(ent);
7769
49.5M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
1.40G
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
1.40G
    xmlChar *name;
7805
1.40G
    const xmlChar *ptr;
7806
1.40G
    xmlChar cur;
7807
1.40G
    xmlEntityPtr ent = NULL;
7808
7809
1.40G
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
1.40G
    ptr = *str;
7812
1.40G
    cur = *ptr;
7813
1.40G
    if (cur != '&')
7814
897M
  return(NULL);
7815
7816
502M
    ptr++;
7817
502M
    name = xmlParseStringName(ctxt, &ptr);
7818
502M
    if (name == NULL) {
7819
33.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
33.7k
           "xmlParseStringEntityRef: no name\n");
7821
33.7k
  *str = ptr;
7822
33.7k
  return(NULL);
7823
33.7k
    }
7824
502M
    if (*ptr != ';') {
7825
21.2k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
21.2k
        xmlFree(name);
7827
21.2k
  *str = ptr;
7828
21.2k
  return(NULL);
7829
21.2k
    }
7830
502M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
502M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
354M
        ent = xmlGetPredefinedEntity(name);
7838
354M
        if (ent != NULL) {
7839
964k
            xmlFree(name);
7840
964k
            *str = ptr;
7841
964k
            return(ent);
7842
964k
        }
7843
354M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
501M
    if (ctxt->sax != NULL) {
7850
501M
  if (ctxt->sax->getEntity != NULL)
7851
501M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
501M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
43.9M
      ent = xmlGetPredefinedEntity(name);
7854
501M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
114M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
114M
  }
7857
501M
    }
7858
501M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
501M
    if (ent == NULL) {
7885
114M
  if ((ctxt->standalone == 1) ||
7886
114M
      ((ctxt->hasExternalSubset == 0) &&
7887
114M
       (ctxt->hasPErefs == 0))) {
7888
114M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
114M
         "Entity '%s' not defined\n", name);
7890
114M
  } else {
7891
227k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
227k
        "Entity '%s' not defined\n",
7893
227k
        name);
7894
227k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
114M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
386M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
537
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
537
     "Entity reference to unparsed entity %s\n", name);
7906
537
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
386M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
386M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
1.43M
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
1.43M
   "Attribute references external entity '%s'\n", name);
7917
1.43M
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
385M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
385M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
382M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
75.8k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.61k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
75.8k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
75.8k
        }
7931
382M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
1.43M
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
1.43M
                    "'<' in entity '%s' is not allowed in attributes "
7934
1.43M
                    "values\n", name);
7935
382M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
2.79M
    else {
7941
2.79M
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
2.79M
      default:
7949
2.79M
      break;
7950
2.79M
  }
7951
2.79M
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
501M
    xmlFree(name);
7961
501M
    *str = ptr;
7962
501M
    return(ent);
7963
501M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
502M
{
8000
502M
    const xmlChar *name;
8001
502M
    xmlEntityPtr entity = NULL;
8002
502M
    xmlParserInputPtr input;
8003
8004
502M
    if (RAW != '%')
8005
0
        return;
8006
502M
    NEXT;
8007
502M
    name = xmlParseName(ctxt);
8008
502M
    if (name == NULL) {
8009
414k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
414k
  return;
8011
414k
    }
8012
502M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
502M
    if (RAW != ';') {
8016
4.90M
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
4.90M
        return;
8018
4.90M
    }
8019
8020
497M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
497M
    if ((ctxt->sax != NULL) &&
8026
497M
  (ctxt->sax->getParameterEntity != NULL))
8027
497M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
497M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
497M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
80.6M
  if ((ctxt->standalone == 1) ||
8040
80.6M
      ((ctxt->hasExternalSubset == 0) &&
8041
80.6M
       (ctxt->hasPErefs == 0))) {
8042
4.45k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
4.45k
            "PEReference: %%%s; not found\n",
8044
4.45k
            name);
8045
80.6M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
80.6M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
12.3M
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
12.3M
                                 "PEReference: %%%s; not found\n",
8056
12.3M
                                 name, NULL);
8057
12.3M
            } else
8058
68.3M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
68.3M
                              "PEReference: %%%s; not found\n",
8060
68.3M
                              name, NULL);
8061
80.6M
            ctxt->valid = 0;
8062
80.6M
  }
8063
416M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
416M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
416M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
416M
  } else {
8073
416M
            xmlChar start[4];
8074
416M
            xmlCharEncoding enc;
8075
416M
            unsigned long parentConsumed;
8076
416M
            xmlEntityPtr oldEnt;
8077
8078
416M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
416M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
416M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
416M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
416M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
416M
    (ctxt->replaceEntities == 0) &&
8084
416M
    (ctxt->validate == 0))
8085
215
    return;
8086
8087
416M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
545
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
545
                xmlHaltParser(ctxt);
8090
545
                return;
8091
545
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
416M
            parentConsumed = ctxt->input->parentConsumed;
8095
416M
            oldEnt = ctxt->input->entity;
8096
416M
            if ((oldEnt == NULL) ||
8097
416M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
411M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
6.90M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
6.90M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
6.90M
                                     ctxt->input->cur - ctxt->input->base);
8102
6.90M
            }
8103
8104
416M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
416M
      if (xmlPushInput(ctxt, input) < 0) {
8106
5.24k
                xmlFreeInputStream(input);
8107
5.24k
    return;
8108
5.24k
            }
8109
8110
416M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
416M
            input->parentConsumed = parentConsumed;
8113
8114
416M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
20.8k
                GROW
8125
20.8k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
20.8k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
20.8k
                    start[0] = RAW;
8129
20.8k
                    start[1] = NXT(1);
8130
20.8k
                    start[2] = NXT(2);
8131
20.8k
                    start[3] = NXT(3);
8132
20.8k
                    enc = xmlDetectCharEncoding(start, 4);
8133
20.8k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
729
                        xmlSwitchEncoding(ctxt, enc);
8135
729
                    }
8136
20.8k
                }
8137
8138
20.8k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
20.8k
                    (IS_BLANK_CH(NXT(5)))) {
8140
429
                    xmlParseTextDecl(ctxt);
8141
429
                }
8142
20.8k
            }
8143
416M
  }
8144
416M
    }
8145
497M
    ctxt->hasPErefs = 1;
8146
497M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
5.35k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
5.35k
    xmlParserInputPtr input;
8162
5.35k
    xmlBufferPtr buf;
8163
5.35k
    int l, c;
8164
5.35k
    int count = 0;
8165
8166
5.35k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
5.35k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
5.35k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
5.35k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
5.35k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
5.35k
    buf = xmlBufferCreate();
8180
5.35k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
5.35k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
5.35k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
5.35k
    if (input == NULL) {
8189
923
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
923
              "xmlLoadEntityContent input error");
8191
923
  xmlBufferFree(buf);
8192
923
        return(-1);
8193
923
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
4.42k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
4.42k
    GROW;
8206
4.42k
    c = CUR_CHAR(l);
8207
12.1M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
12.1M
           (IS_CHAR(c))) {
8209
12.1M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
12.1M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
117k
      count = 0;
8212
117k
      GROW;
8213
117k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
117k
  }
8218
12.1M
  NEXTL(l);
8219
12.1M
  c = CUR_CHAR(l);
8220
12.1M
  if (c == 0) {
8221
3.60k
      count = 0;
8222
3.60k
      GROW;
8223
3.60k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
3.60k
      c = CUR_CHAR(l);
8228
3.60k
  }
8229
12.1M
    }
8230
8231
4.42k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
2.29k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
2.29k
        xmlPopInput(ctxt);
8234
2.29k
    } else if (!IS_CHAR(c)) {
8235
2.12k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
2.12k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
2.12k
                    c);
8238
2.12k
  xmlBufferFree(buf);
8239
2.12k
  return(-1);
8240
2.12k
    }
8241
2.29k
    entity->content = buf->content;
8242
2.29k
    entity->length = buf->use;
8243
2.29k
    buf->content = NULL;
8244
2.29k
    xmlBufferFree(buf);
8245
8246
2.29k
    return(0);
8247
4.42k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
3.74M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
3.74M
    const xmlChar *ptr;
8283
3.74M
    xmlChar cur;
8284
3.74M
    xmlChar *name;
8285
3.74M
    xmlEntityPtr entity = NULL;
8286
8287
3.74M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
3.74M
    ptr = *str;
8289
3.74M
    cur = *ptr;
8290
3.74M
    if (cur != '%')
8291
0
        return(NULL);
8292
3.74M
    ptr++;
8293
3.74M
    name = xmlParseStringName(ctxt, &ptr);
8294
3.74M
    if (name == NULL) {
8295
27.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
27.4k
           "xmlParseStringPEReference: no name\n");
8297
27.4k
  *str = ptr;
8298
27.4k
  return(NULL);
8299
27.4k
    }
8300
3.71M
    cur = *ptr;
8301
3.71M
    if (cur != ';') {
8302
2.55k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
2.55k
  xmlFree(name);
8304
2.55k
  *str = ptr;
8305
2.55k
  return(NULL);
8306
2.55k
    }
8307
3.71M
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
3.71M
    if ((ctxt->sax != NULL) &&
8313
3.71M
  (ctxt->sax->getParameterEntity != NULL))
8314
3.71M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
3.71M
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
3.71M
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
137k
  if ((ctxt->standalone == 1) ||
8330
137k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
834
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
834
     "PEReference: %%%s; not found\n", name);
8333
137k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
137k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
137k
        "PEReference: %%%s; not found\n",
8343
137k
        name, NULL);
8344
137k
      ctxt->valid = 0;
8345
137k
  }
8346
3.57M
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
3.57M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
3.57M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
3.57M
    }
8357
3.71M
    ctxt->hasPErefs = 1;
8358
3.71M
    xmlFree(name);
8359
3.71M
    *str = ptr;
8360
3.71M
    return(entity);
8361
3.71M
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
560k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
560k
    const xmlChar *name = NULL;
8382
560k
    xmlChar *ExternalID = NULL;
8383
560k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
560k
    SKIP(9);
8389
8390
560k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
560k
    name = xmlParseName(ctxt);
8396
560k
    if (name == NULL) {
8397
3.19k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
3.19k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
3.19k
    }
8400
560k
    ctxt->intSubName = name;
8401
8402
560k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
560k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
560k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
272k
        ctxt->hasExternalSubset = 1;
8411
272k
    }
8412
560k
    ctxt->extSubURI = URI;
8413
560k
    ctxt->extSubSystem = ExternalID;
8414
8415
560k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
560k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
560k
  (!ctxt->disableSAX))
8422
541k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
560k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
560k
    if (RAW == '[')
8431
382k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
177k
    if (RAW != '>') {
8437
31.6k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
31.6k
    }
8439
177k
    NEXT;
8440
177k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
385k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
385k
    if (RAW == '[') {
8457
385k
        int baseInputNr = ctxt->inputNr;
8458
385k
        ctxt->instate = XML_PARSER_DTD;
8459
385k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
385k
  SKIP_BLANKS;
8466
419M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
419M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
419M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
419M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
419M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
418M
          xmlParseMarkupDecl(ctxt);
8478
418M
            } else if (RAW == '%') {
8479
442k
          xmlParsePEReference(ctxt);
8480
442k
            } else {
8481
75.8k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
75.8k
                        "xmlParseInternalSubset: error detected in"
8483
75.8k
                        " Markup declaration\n");
8484
75.8k
                xmlHaltParser(ctxt);
8485
75.8k
                return;
8486
75.8k
            }
8487
419M
      SKIP_BLANKS;
8488
419M
  }
8489
309k
  if (RAW == ']') {
8490
284k
      NEXT;
8491
284k
      SKIP_BLANKS;
8492
284k
  }
8493
309k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
309k
    if (RAW != '>') {
8499
26.5k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
26.5k
  return;
8501
26.5k
    }
8502
282k
    NEXT;
8503
282k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
18.0M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
18.0M
    const xmlChar *name;
8544
18.0M
    xmlChar *val;
8545
8546
18.0M
    *value = NULL;
8547
18.0M
    GROW;
8548
18.0M
    name = xmlParseName(ctxt);
8549
18.0M
    if (name == NULL) {
8550
608k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
608k
                 "error parsing attribute name\n");
8552
608k
        return(NULL);
8553
608k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
17.4M
    SKIP_BLANKS;
8559
17.4M
    if (RAW == '=') {
8560
17.1M
        NEXT;
8561
17.1M
  SKIP_BLANKS;
8562
17.1M
  val = xmlParseAttValue(ctxt);
8563
17.1M
  ctxt->instate = XML_PARSER_CONTENT;
8564
17.1M
    } else {
8565
271k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
271k
         "Specification mandates value for attribute %s\n", name);
8567
271k
  return(name);
8568
271k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
17.1M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
21.6k
  if (!xmlCheckLanguageID(val)) {
8577
10.6k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
10.6k
              "Malformed value for xml:lang : %s\n",
8579
10.6k
        val, NULL);
8580
10.6k
  }
8581
21.6k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
17.1M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
1.38k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
1.38k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
760
      *(ctxt->space) = 1;
8591
622
  else {
8592
622
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
622
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
622
                                 val, NULL);
8595
622
  }
8596
1.38k
    }
8597
8598
17.1M
    *value = val;
8599
17.1M
    return(name);
8600
17.4M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
19.8M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
19.8M
    const xmlChar *name;
8634
19.8M
    const xmlChar *attname;
8635
19.8M
    xmlChar *attvalue;
8636
19.8M
    const xmlChar **atts = ctxt->atts;
8637
19.8M
    int nbatts = 0;
8638
19.8M
    int maxatts = ctxt->maxatts;
8639
19.8M
    int i;
8640
8641
19.8M
    if (RAW != '<') return(NULL);
8642
19.8M
    NEXT1;
8643
8644
19.8M
    name = xmlParseName(ctxt);
8645
19.8M
    if (name == NULL) {
8646
261k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
261k
       "xmlParseStartTag: invalid element name\n");
8648
261k
        return(NULL);
8649
261k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
19.5M
    SKIP_BLANKS;
8657
19.5M
    GROW;
8658
8659
26.2M
    while (((RAW != '>') &&
8660
26.2M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
26.2M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
18.0M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
18.0M
        if (attname == NULL) {
8664
608k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
608k
         "xmlParseStartTag: problem parsing attributes\n");
8666
608k
      break;
8667
608k
  }
8668
17.4M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
23.3M
      for (i = 0; i < nbatts;i += 2) {
8675
6.24M
          if (xmlStrEqual(atts[i], attname)) {
8676
11.3k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
11.3k
        xmlFree(attvalue);
8678
11.3k
        goto failed;
8679
11.3k
    }
8680
6.24M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
17.1M
      if (atts == NULL) {
8685
137k
          maxatts = 22; /* allow for 10 attrs by default */
8686
137k
          atts = (const xmlChar **)
8687
137k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
137k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
137k
    ctxt->atts = atts;
8695
137k
    ctxt->maxatts = maxatts;
8696
16.9M
      } else if (nbatts + 4 > maxatts) {
8697
324
          const xmlChar **n;
8698
8699
324
          maxatts *= 2;
8700
324
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
324
               maxatts * sizeof(const xmlChar *));
8702
324
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
324
    atts = n;
8709
324
    ctxt->atts = atts;
8710
324
    ctxt->maxatts = maxatts;
8711
324
      }
8712
17.1M
      atts[nbatts++] = attname;
8713
17.1M
      atts[nbatts++] = attvalue;
8714
17.1M
      atts[nbatts] = NULL;
8715
17.1M
      atts[nbatts + 1] = NULL;
8716
17.1M
  } else {
8717
303k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
303k
  }
8720
8721
17.4M
failed:
8722
8723
17.4M
  GROW
8724
17.4M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
10.7M
      break;
8726
6.67M
  if (SKIP_BLANKS == 0) {
8727
578k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
578k
         "attributes construct error\n");
8729
578k
  }
8730
6.67M
  SHRINK;
8731
6.67M
        GROW;
8732
6.67M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
19.5M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
19.5M
  (!ctxt->disableSAX)) {
8739
18.4M
  if (nbatts > 0)
8740
10.4M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
7.97M
  else
8742
7.97M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
18.4M
    }
8744
8745
19.5M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
36.0M
        for (i = 1;i < nbatts;i+=2)
8748
17.1M
      if (atts[i] != NULL)
8749
17.1M
         xmlFree((xmlChar *) atts[i]);
8750
18.9M
    }
8751
19.5M
    return(name);
8752
19.5M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
12.3M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
12.3M
    const xmlChar *name;
8772
8773
12.3M
    GROW;
8774
12.3M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
12.3M
    SKIP(2);
8780
8781
12.3M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
12.3M
    GROW;
8787
12.3M
    SKIP_BLANKS;
8788
12.3M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
89.4k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
89.4k
    } else
8791
12.3M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
12.3M
    if (name != (xmlChar*)1) {
8800
330k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
330k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
330k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
330k
                    ctxt->name, line, name);
8804
330k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
12.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
12.3M
  (!ctxt->disableSAX))
8811
11.7M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
12.3M
    namePop(ctxt);
8814
12.3M
    spacePop(ctxt);
8815
12.3M
    return;
8816
12.3M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
33.4M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
33.4M
    int i;
8858
8859
33.4M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
39.2M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
8.23M
        if (ctxt->nsTab[i] == prefix) {
8862
1.82M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
50.5k
          return(NULL);
8864
1.77M
      return(ctxt->nsTab[i + 1]);
8865
1.82M
  }
8866
31.0M
    return(NULL);
8867
32.8M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
64.1M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
64.1M
    const xmlChar *l, *p;
8886
8887
64.1M
    GROW;
8888
8889
64.1M
    l = xmlParseNCName(ctxt);
8890
64.1M
    if (l == NULL) {
8891
805k
        if (CUR == ':') {
8892
15.1k
      l = xmlParseName(ctxt);
8893
15.1k
      if (l != NULL) {
8894
15.1k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
15.1k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
15.1k
    *prefix = NULL;
8897
15.1k
    return(l);
8898
15.1k
      }
8899
15.1k
  }
8900
790k
        return(NULL);
8901
805k
    }
8902
63.3M
    if (CUR == ':') {
8903
2.58M
        NEXT;
8904
2.58M
  p = l;
8905
2.58M
  l = xmlParseNCName(ctxt);
8906
2.58M
  if (l == NULL) {
8907
49.5k
      xmlChar *tmp;
8908
8909
49.5k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
49.5k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
49.5k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
49.5k
      l = xmlParseNmtoken(ctxt);
8914
49.5k
      if (l == NULL) {
8915
36.1k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
36.1k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
36.1k
            } else {
8919
13.3k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
13.3k
    xmlFree((char *)l);
8921
13.3k
      }
8922
49.5k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
49.5k
      if (tmp != NULL) xmlFree(tmp);
8924
49.5k
      *prefix = NULL;
8925
49.5k
      return(p);
8926
49.5k
  }
8927
2.53M
  if (CUR == ':') {
8928
76.3k
      xmlChar *tmp;
8929
8930
76.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
76.3k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
76.3k
      NEXT;
8933
76.3k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
76.3k
      if (tmp != NULL) {
8935
65.3k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
65.3k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
65.3k
    if (tmp != NULL) xmlFree(tmp);
8938
65.3k
    *prefix = p;
8939
65.3k
    return(l);
8940
65.3k
      }
8941
11.0k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
11.0k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
11.0k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
11.0k
      if (tmp != NULL) xmlFree(tmp);
8946
11.0k
      *prefix = p;
8947
11.0k
      return(l);
8948
11.0k
  }
8949
2.45M
  *prefix = p;
8950
2.45M
    } else
8951
60.7M
        *prefix = NULL;
8952
63.2M
    return(l);
8953
63.3M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
607k
                        xmlChar const *prefix) {
8971
607k
    const xmlChar *cmp;
8972
607k
    const xmlChar *in;
8973
607k
    const xmlChar *ret;
8974
607k
    const xmlChar *prefix2;
8975
8976
607k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
607k
    GROW;
8979
607k
    in = ctxt->input->cur;
8980
8981
607k
    cmp = prefix;
8982
2.15M
    while (*in != 0 && *in == *cmp) {
8983
1.54M
  ++in;
8984
1.54M
  ++cmp;
8985
1.54M
    }
8986
607k
    if ((*cmp == 0) && (*in == ':')) {
8987
550k
        in++;
8988
550k
  cmp = name;
8989
4.24M
  while (*in != 0 && *in == *cmp) {
8990
3.69M
      ++in;
8991
3.69M
      ++cmp;
8992
3.69M
  }
8993
550k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
417k
            ctxt->input->col += in - ctxt->input->cur;
8996
417k
      ctxt->input->cur = in;
8997
417k
      return((const xmlChar*) 1);
8998
417k
  }
8999
550k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
190k
    ret = xmlParseQName (ctxt, &prefix2);
9004
190k
    if ((ret == name) && (prefix == prefix2))
9005
2.81k
  return((const xmlChar*) 1);
9006
187k
    return ret;
9007
190k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
36.0k
    const xmlChar *oldbase = ctxt->input->base;\
9045
36.0k
    GROW;\
9046
36.0k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
36.0k
        return(NULL);\
9048
36.0k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
36.0k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
49.2M
{
9059
49.2M
    xmlChar limit = 0;
9060
49.2M
    const xmlChar *in = NULL, *start, *end, *last;
9061
49.2M
    xmlChar *ret = NULL;
9062
49.2M
    int line, col;
9063
49.2M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
16.0M
                    XML_MAX_HUGE_LENGTH :
9065
49.2M
                    XML_MAX_TEXT_LENGTH;
9066
9067
49.2M
    GROW;
9068
49.2M
    in = (xmlChar *) CUR_PTR;
9069
49.2M
    line = ctxt->input->line;
9070
49.2M
    col = ctxt->input->col;
9071
49.2M
    if (*in != '"' && *in != '\'') {
9072
87.4k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
87.4k
        return (NULL);
9074
87.4k
    }
9075
49.1M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
49.1M
    limit = *in++;
9083
49.1M
    col++;
9084
49.1M
    end = ctxt->input->end;
9085
49.1M
    start = in;
9086
49.1M
    if (in >= end) {
9087
1.60k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
1.60k
    }
9089
49.1M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
3.12M
  while ((in < end) && (*in != limit) &&
9094
3.12M
         ((*in == 0x20) || (*in == 0x9) ||
9095
3.11M
          (*in == 0xA) || (*in == 0xD))) {
9096
208k
      if (*in == 0xA) {
9097
79.0k
          line++; col = 1;
9098
129k
      } else {
9099
129k
          col++;
9100
129k
      }
9101
208k
      in++;
9102
208k
      start = in;
9103
208k
      if (in >= end) {
9104
269
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
269
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
269
      }
9111
208k
  }
9112
29.2M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
29.2M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
26.3M
      col++;
9115
26.3M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
26.3M
      if (in >= end) {
9117
439
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
439
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
439
      }
9124
26.3M
  }
9125
2.91M
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
2.92M
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
3.12M
  while ((in < end) && (*in != limit) &&
9131
3.12M
         ((*in == 0x20) || (*in == 0x9) ||
9132
315k
          (*in == 0xA) || (*in == 0xD))) {
9133
216k
      if (*in == 0xA) {
9134
64.0k
          line++, col = 1;
9135
152k
      } else {
9136
152k
          col++;
9137
152k
      }
9138
216k
      in++;
9139
216k
      if (in >= end) {
9140
274
    const xmlChar *oldbase = ctxt->input->base;
9141
274
    GROW;
9142
274
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
274
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
274
    end = ctxt->input->end;
9151
274
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
274
      }
9157
216k
  }
9158
2.91M
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
2.91M
  if (*in != limit) goto need_complex;
9164
46.2M
    } else {
9165
519M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
519M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
473M
      in++;
9168
473M
      col++;
9169
473M
      if (in >= end) {
9170
33.7k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
33.7k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
33.7k
      }
9177
473M
  }
9178
46.2M
  last = in;
9179
46.2M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
46.2M
  if (*in != limit) goto need_complex;
9185
46.2M
    }
9186
47.6M
    in++;
9187
47.6M
    col++;
9188
47.6M
    if (len != NULL) {
9189
29.9M
        if (alloc) *alloc = 0;
9190
29.9M
        *len = last - start;
9191
29.9M
        ret = (xmlChar *) start;
9192
29.9M
    } else {
9193
17.7M
        if (alloc) *alloc = 1;
9194
17.7M
        ret = xmlStrndup(start, last - start);
9195
17.7M
    }
9196
47.6M
    CUR_PTR = in;
9197
47.6M
    ctxt->input->line = line;
9198
47.6M
    ctxt->input->col = col;
9199
47.6M
    return ret;
9200
1.50M
need_complex:
9201
1.50M
    if (alloc) *alloc = 1;
9202
1.50M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
49.1M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
31.3M
{
9226
31.3M
    const xmlChar *name;
9227
31.3M
    xmlChar *val, *internal_val = NULL;
9228
31.3M
    int normalize = 0;
9229
9230
31.3M
    *value = NULL;
9231
31.3M
    GROW;
9232
31.3M
    name = xmlParseQName(ctxt, prefix);
9233
31.3M
    if (name == NULL) {
9234
366k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
366k
                       "error parsing attribute name\n");
9236
366k
        return (NULL);
9237
366k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
30.9M
    if (ctxt->attsSpecial != NULL) {
9243
5.75M
        int type;
9244
9245
5.75M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
5.75M
                                                 pref, elem, *prefix, name);
9247
5.75M
        if (type != 0)
9248
2.92M
            normalize = 1;
9249
5.75M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
30.9M
    SKIP_BLANKS;
9255
30.9M
    if (RAW == '=') {
9256
30.7M
        NEXT;
9257
30.7M
        SKIP_BLANKS;
9258
30.7M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
30.7M
        if (val == NULL)
9260
43.3k
            return (NULL);
9261
30.7M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
2.91M
      if (*alloc) {
9269
100k
          const xmlChar *val2;
9270
9271
100k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
100k
    if ((val2 != NULL) && (val2 != val)) {
9273
12.3k
        xmlFree(val);
9274
12.3k
        val = (xmlChar *) val2;
9275
12.3k
    }
9276
100k
      }
9277
2.91M
  }
9278
30.7M
        ctxt->instate = XML_PARSER_CONTENT;
9279
30.7M
    } else {
9280
216k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
216k
                          "Specification mandates value for attribute %s\n",
9282
216k
                          name);
9283
216k
        return (name);
9284
216k
    }
9285
9286
30.7M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
90.2k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
17.5k
            internal_val = xmlStrndup(val, *len);
9294
17.5k
            if (!xmlCheckLanguageID(internal_val)) {
9295
9.97k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
9.97k
                              "Malformed value for xml:lang : %s\n",
9297
9.97k
                              internal_val, NULL);
9298
9.97k
            }
9299
17.5k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
90.2k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
1.11k
            internal_val = xmlStrndup(val, *len);
9306
1.11k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
1.11k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
490
                *(ctxt->space) = 1;
9310
629
            else {
9311
629
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
629
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
629
                              internal_val, NULL);
9314
629
            }
9315
1.11k
        }
9316
90.2k
        if (internal_val) {
9317
18.7k
            xmlFree(internal_val);
9318
18.7k
        }
9319
90.2k
    }
9320
9321
30.7M
    *value = val;
9322
30.7M
    return (name);
9323
30.9M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
32.6M
                  const xmlChar **URI, int *tlen) {
9356
32.6M
    const xmlChar *localname;
9357
32.6M
    const xmlChar *prefix;
9358
32.6M
    const xmlChar *attname;
9359
32.6M
    const xmlChar *aprefix;
9360
32.6M
    const xmlChar *nsname;
9361
32.6M
    xmlChar *attvalue;
9362
32.6M
    const xmlChar **atts = ctxt->atts;
9363
32.6M
    int maxatts = ctxt->maxatts;
9364
32.6M
    int nratts, nbatts, nbdef, inputid;
9365
32.6M
    int i, j, nbNs, attval;
9366
32.6M
    unsigned long cur;
9367
32.6M
    int nsNr = ctxt->nsNr;
9368
9369
32.6M
    if (RAW != '<') return(NULL);
9370
32.6M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
32.6M
    SHRINK;
9380
32.6M
    cur = ctxt->input->cur - ctxt->input->base;
9381
32.6M
    inputid = ctxt->input->id;
9382
32.6M
    nbatts = 0;
9383
32.6M
    nratts = 0;
9384
32.6M
    nbdef = 0;
9385
32.6M
    nbNs = 0;
9386
32.6M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
32.6M
    ctxt->nsNr = nsNr;
9389
9390
32.6M
    localname = xmlParseQName(ctxt, &prefix);
9391
32.6M
    if (localname == NULL) {
9392
420k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
420k
           "StartTag: invalid element name\n");
9394
420k
        return(NULL);
9395
420k
    }
9396
32.2M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
32.2M
    SKIP_BLANKS;
9404
32.2M
    GROW;
9405
9406
43.6M
    while (((RAW != '>') &&
9407
43.6M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
43.6M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
31.3M
  int len = -1, alloc = 0;
9410
9411
31.3M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
31.3M
                               &aprefix, &attvalue, &len, &alloc);
9413
31.3M
        if (attname == NULL) {
9414
409k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
409k
           "xmlParseStartTag: problem parsing attributes\n");
9416
409k
      break;
9417
409k
  }
9418
30.9M
        if (attvalue == NULL)
9419
216k
            goto next_attr;
9420
30.7M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
30.7M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
160k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
160k
            xmlURIPtr uri;
9425
9426
160k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
160k
            if (*URL != 0) {
9434
157k
                uri = xmlParseURI((const char *) URL);
9435
157k
                if (uri == NULL) {
9436
52.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
52.2k
                             "xmlns: '%s' is not a valid URI\n",
9438
52.2k
                                       URL, NULL, NULL);
9439
105k
                } else {
9440
105k
                    if (uri->scheme == NULL) {
9441
75.4k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
75.4k
                                  "xmlns: URI %s is not absolute\n",
9443
75.4k
                                  URL, NULL, NULL);
9444
75.4k
                    }
9445
105k
                    xmlFreeURI(uri);
9446
105k
                }
9447
157k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
157k
                if ((len == 29) &&
9456
157k
                    (xmlStrEqual(URL,
9457
2.12k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
157k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
175k
            for (j = 1;j <= nbNs;j++)
9468
16.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
1.67k
                    break;
9470
160k
            if (j <= nbNs)
9471
1.67k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
158k
            else
9473
158k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
30.5M
        } else if (aprefix == ctxt->str_xmlns) {
9476
182k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
182k
            xmlURIPtr uri;
9478
9479
182k
            if (attname == ctxt->str_xml) {
9480
1.28k
                if (URL != ctxt->str_xml_ns) {
9481
1.28k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
1.28k
                             "xml namespace prefix mapped to wrong URI\n",
9483
1.28k
                             NULL, NULL, NULL);
9484
1.28k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
1.28k
                goto next_attr;
9489
1.28k
            }
9490
181k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
181k
            if (attname == ctxt->str_xmlns) {
9499
823
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
823
                         "redefinition of the xmlns prefix is forbidden\n",
9501
823
                         NULL, NULL, NULL);
9502
823
                goto next_attr;
9503
823
            }
9504
180k
            if ((len == 29) &&
9505
180k
                (xmlStrEqual(URL,
9506
1.35k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
180k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
19.5k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
19.5k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
19.5k
                              attname, NULL, NULL);
9516
19.5k
                goto next_attr;
9517
161k
            } else {
9518
161k
                uri = xmlParseURI((const char *) URL);
9519
161k
                if (uri == NULL) {
9520
28.6k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
28.6k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
28.6k
                                       attname, URL, NULL);
9523
132k
                } else {
9524
132k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
11.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
11.7k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
11.7k
                                  attname, URL, NULL);
9528
11.7k
                    }
9529
132k
                    xmlFreeURI(uri);
9530
132k
                }
9531
161k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
223k
            for (j = 1;j <= nbNs;j++)
9537
68.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
6.05k
                    break;
9539
161k
            if (j <= nbNs)
9540
6.05k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
155k
            else
9542
155k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
30.3M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
30.3M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
166k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
166k
                maxatts = ctxt->maxatts;
9553
166k
                atts = ctxt->atts;
9554
166k
            }
9555
30.3M
            ctxt->attallocs[nratts++] = alloc;
9556
30.3M
            atts[nbatts++] = attname;
9557
30.3M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
30.3M
            if (alloc)
9565
711k
                atts[nbatts++] = NULL;
9566
29.6M
            else
9567
29.6M
                atts[nbatts++] = ctxt->input->base;
9568
30.3M
            atts[nbatts++] = attvalue;
9569
30.3M
            attvalue += len;
9570
30.3M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
30.3M
            if (alloc != 0) attval = 1;
9575
30.3M
            attvalue = NULL; /* moved into atts */
9576
30.3M
        }
9577
9578
30.9M
next_attr:
9579
30.9M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
119k
            xmlFree(attvalue);
9581
119k
            attvalue = NULL;
9582
119k
        }
9583
9584
30.9M
  GROW
9585
30.9M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
30.9M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
18.9M
      break;
9589
11.9M
  if (SKIP_BLANKS == 0) {
9590
536k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
536k
         "attributes construct error\n");
9592
536k
      break;
9593
536k
  }
9594
11.4M
        GROW;
9595
11.4M
    }
9596
9597
32.2M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
62.6M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
30.3M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
29.6M
            const xmlChar *old = atts[i+2];
9612
29.6M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
29.6M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
29.6M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
29.6M
        }
9616
30.3M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
32.2M
    if (ctxt->attsDefault != NULL) {
9622
10.4M
        xmlDefAttrsPtr defaults;
9623
9624
10.4M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
10.4M
  if (defaults != NULL) {
9626
1.89M
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
1.28M
          attname = defaults->values[5 * i];
9628
1.28M
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
1.28M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
9.26k
        for (j = 1;j <= nbNs;j++)
9638
3.66k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
2.38k
          break;
9640
7.98k
              if (j <= nbNs) continue;
9641
9642
5.59k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
5.59k
        if (nsname != defaults->values[5 * i + 2]) {
9644
3.45k
      if (nsPush(ctxt, NULL,
9645
3.45k
                 defaults->values[5 * i + 2]) > 0)
9646
2.99k
          nbNs++;
9647
3.45k
        }
9648
1.28M
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
14.7k
        for (j = 1;j <= nbNs;j++)
9653
9.49k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
7.93k
          break;
9655
13.1k
              if (j <= nbNs) continue;
9656
9657
5.23k
        nsname = xmlGetNamespace(ctxt, attname);
9658
5.23k
        if (nsname != defaults->values[5 * i + 2]) {
9659
3.46k
      if (nsPush(ctxt, attname,
9660
3.46k
                 defaults->values[5 * i + 2]) > 0)
9661
3.46k
          nbNs++;
9662
3.46k
        }
9663
1.26M
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
3.42M
        for (j = 0;j < nbatts;j+=5) {
9668
2.19M
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
30.4k
          break;
9670
2.19M
        }
9671
1.26M
        if (j < nbatts) continue;
9672
9673
1.23M
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
3.85k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
3.85k
      maxatts = ctxt->maxatts;
9679
3.85k
      atts = ctxt->atts;
9680
3.85k
        }
9681
1.23M
        atts[nbatts++] = attname;
9682
1.23M
        atts[nbatts++] = aprefix;
9683
1.23M
        if (aprefix == NULL)
9684
1.00M
      atts[nbatts++] = NULL;
9685
226k
        else
9686
226k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
1.23M
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
1.23M
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
1.23M
        if ((ctxt->standalone == 1) &&
9690
1.23M
            (defaults->values[5 * i + 4] != NULL)) {
9691
48
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
48
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
48
                                   attname, localname);
9694
48
        }
9695
1.23M
        nbdef++;
9696
1.23M
    }
9697
1.28M
      }
9698
605k
  }
9699
10.4M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
63.8M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
31.6M
  if (atts[i + 1] != NULL) {
9709
960k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
960k
      if (nsname == NULL) {
9711
192k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
192k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
192k
        atts[i + 1], atts[i], localname);
9714
192k
      }
9715
960k
      atts[i + 2] = nsname;
9716
960k
  } else
9717
30.6M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
45.2M
        for (j = 0; j < i;j += 5) {
9725
13.6M
      if (atts[i] == atts[j]) {
9726
35.7k
          if (atts[i+1] == atts[j+1]) {
9727
13.6k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
13.6k
        break;
9729
13.6k
    }
9730
22.0k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
1.37k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
1.37k
           "Namespaced Attribute %s in '%s' redefined\n",
9733
1.37k
           atts[i], nsname, NULL);
9734
1.37k
        break;
9735
1.37k
    }
9736
22.0k
      }
9737
13.6M
  }
9738
31.6M
    }
9739
9740
32.2M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
32.2M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
655k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
655k
           "Namespace prefix %s on %s is not defined\n",
9744
655k
     prefix, localname, NULL);
9745
655k
    }
9746
32.2M
    *pref = prefix;
9747
32.2M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
32.2M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
32.2M
  (!ctxt->disableSAX)) {
9754
28.0M
  if (nbNs > 0)
9755
184k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
184k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
184k
        nbatts / 5, nbdef, atts);
9758
27.9M
  else
9759
27.9M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
27.9M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
28.0M
    }
9762
9763
32.2M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
32.2M
    if (attval != 0) {
9768
1.51M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
829k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
711k
          xmlFree((xmlChar *) atts[i]);
9771
685k
    }
9772
9773
32.2M
    return(localname);
9774
32.2M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
19.3M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
19.3M
    const xmlChar *name;
9794
9795
19.3M
    GROW;
9796
19.3M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
19.3M
    SKIP(2);
9801
9802
19.3M
    if (tag->prefix == NULL)
9803
18.7M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
607k
    else
9805
607k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
19.3M
    GROW;
9811
19.3M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
19.3M
    SKIP_BLANKS;
9814
19.3M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
129k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
129k
    } else
9817
19.2M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
19.3M
    if (name != (xmlChar*)1) {
9826
474k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
474k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
474k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
474k
                    ctxt->name, tag->line, name);
9830
474k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
19.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
19.3M
  (!ctxt->disableSAX))
9837
17.3M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
17.3M
                                tag->URI);
9839
9840
19.3M
    spacePop(ctxt);
9841
19.3M
    if (tag->nsNr != 0)
9842
35.6k
  nsPop(ctxt, tag->nsNr);
9843
19.3M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
94.7k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
94.7k
    xmlChar *buf = NULL;
9864
94.7k
    int len = 0;
9865
94.7k
    int size = XML_PARSER_BUFFER_SIZE;
9866
94.7k
    int r, rl;
9867
94.7k
    int s, sl;
9868
94.7k
    int cur, l;
9869
94.7k
    int count = 0;
9870
94.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
27.0k
                    XML_MAX_HUGE_LENGTH :
9872
94.7k
                    XML_MAX_TEXT_LENGTH;
9873
9874
94.7k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
94.7k
    SKIP(3);
9877
9878
94.7k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
94.7k
    SKIP(6);
9881
9882
94.7k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
94.7k
    r = CUR_CHAR(rl);
9884
94.7k
    if (!IS_CHAR(r)) {
9885
1.67k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
1.67k
        goto out;
9887
1.67k
    }
9888
93.0k
    NEXTL(rl);
9889
93.0k
    s = CUR_CHAR(sl);
9890
93.0k
    if (!IS_CHAR(s)) {
9891
768
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
768
        goto out;
9893
768
    }
9894
92.2k
    NEXTL(sl);
9895
92.2k
    cur = CUR_CHAR(l);
9896
92.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
92.2k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
41.7M
    while (IS_CHAR(cur) &&
9902
41.7M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
41.6M
  if (len + 5 >= size) {
9904
90.5k
      xmlChar *tmp;
9905
9906
90.5k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
90.5k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
90.5k
      buf = tmp;
9912
90.5k
      size *= 2;
9913
90.5k
  }
9914
41.6M
  COPY_BUF(rl,buf,len,r);
9915
41.6M
  r = s;
9916
41.6M
  rl = sl;
9917
41.6M
  s = cur;
9918
41.6M
  sl = l;
9919
41.6M
  count++;
9920
41.6M
  if (count > 50) {
9921
778k
      SHRINK;
9922
778k
      GROW;
9923
778k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
778k
      count = 0;
9927
778k
  }
9928
41.6M
  NEXTL(l);
9929
41.6M
  cur = CUR_CHAR(l);
9930
41.6M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
41.6M
    }
9936
92.2k
    buf[len] = 0;
9937
92.2k
    if (cur != '>') {
9938
12.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
12.6k
                       "CData section not finished\n%.50s\n", buf);
9940
12.6k
        goto out;
9941
12.6k
    }
9942
79.6k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
79.6k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
63.1k
  if (ctxt->sax->cdataBlock != NULL)
9949
38.3k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
24.7k
  else if (ctxt->sax->characters != NULL)
9951
24.7k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
63.1k
    }
9953
9954
94.7k
out:
9955
94.7k
    if (ctxt->instate != XML_PARSER_EOF)
9956
94.7k
        ctxt->instate = XML_PARSER_CONTENT;
9957
94.7k
    xmlFree(buf);
9958
94.7k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
523k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
523k
    int nameNr = ctxt->nameNr;
9971
9972
523k
    GROW;
9973
92.9M
    while ((RAW != 0) &&
9974
92.9M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
92.5M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
92.5M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
95.2k
      xmlParsePI(ctxt);
9982
95.2k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
92.4M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
94.7k
      xmlParseCDSect(ctxt);
9990
94.7k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
92.3M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
92.3M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
504k
      xmlParseComment(ctxt);
9998
504k
      ctxt->instate = XML_PARSER_CONTENT;
9999
504k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
91.8M
  else if (*cur == '<') {
10005
36.5M
            if (NXT(1) == '/') {
10006
13.7M
                if (ctxt->nameNr <= nameNr)
10007
68.0k
                    break;
10008
13.6M
          xmlParseElementEnd(ctxt);
10009
22.8M
            } else {
10010
22.8M
          xmlParseElementStart(ctxt);
10011
22.8M
            }
10012
36.5M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
55.2M
  else if (*cur == '&') {
10020
19.0M
      xmlParseReference(ctxt);
10021
19.0M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
36.2M
  else {
10027
36.2M
      xmlParseCharData(ctxt, 0);
10028
36.2M
  }
10029
10030
92.4M
  GROW;
10031
92.4M
  SHRINK;
10032
92.4M
    }
10033
523k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
373k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
373k
    int nameNr = ctxt->nameNr;
10047
10048
373k
    xmlParseContentInternal(ctxt);
10049
10050
373k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
4.64k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
4.64k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
4.64k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
4.64k
                "Premature end of data in tag %s line %d\n",
10055
4.64k
    name, line, NULL);
10056
4.64k
    }
10057
373k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
222k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
222k
    if (xmlParseElementStart(ctxt) != 0)
10078
72.5k
        return;
10079
10080
149k
    xmlParseContentInternal(ctxt);
10081
149k
    if (ctxt->instate == XML_PARSER_EOF)
10082
630
  return;
10083
10084
149k
    if (CUR == 0) {
10085
82.8k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
82.8k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
82.8k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
82.8k
                "Premature end of data in tag %s line %d\n",
10089
82.8k
    name, line, NULL);
10090
82.8k
        return;
10091
82.8k
    }
10092
10093
66.1k
    xmlParseElementEnd(ctxt);
10094
66.1k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
23.0M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
23.0M
    const xmlChar *name;
10108
23.0M
    const xmlChar *prefix = NULL;
10109
23.0M
    const xmlChar *URI = NULL;
10110
23.0M
    xmlParserNodeInfo node_info;
10111
23.0M
    int line, tlen = 0;
10112
23.0M
    xmlNodePtr ret;
10113
23.0M
    int nsNr = ctxt->nsNr;
10114
10115
23.0M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
23.0M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
56
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
56
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
56
        xmlParserMaxDepth);
10120
56
  xmlHaltParser(ctxt);
10121
56
  return(-1);
10122
56
    }
10123
10124
    /* Capture start position */
10125
23.0M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
23.0M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
23.0M
    else if (*ctxt->space == -2)
10134
2.61M
  spacePush(ctxt, -1);
10135
20.4M
    else
10136
20.4M
  spacePush(ctxt, *ctxt->space);
10137
10138
23.0M
    line = ctxt->input->line;
10139
23.0M
#ifdef LIBXML_SAX1_ENABLED
10140
23.0M
    if (ctxt->sax2)
10141
14.7M
#endif /* LIBXML_SAX1_ENABLED */
10142
14.7M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
8.31M
#ifdef LIBXML_SAX1_ENABLED
10144
8.31M
    else
10145
8.31M
  name = xmlParseStartTag(ctxt);
10146
23.0M
#endif /* LIBXML_SAX1_ENABLED */
10147
23.0M
    if (ctxt->instate == XML_PARSER_EOF)
10148
924
  return(-1);
10149
23.0M
    if (name == NULL) {
10150
646k
  spacePop(ctxt);
10151
646k
        return(-1);
10152
646k
    }
10153
22.4M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
22.4M
    ret = ctxt->node;
10155
10156
22.4M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
22.4M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
22.4M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
22.4M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
22.4M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
7.50M
        SKIP(2);
10172
7.50M
  if (ctxt->sax2) {
10173
5.13M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
5.13M
    (!ctxt->disableSAX))
10175
3.29M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
5.13M
#ifdef LIBXML_SAX1_ENABLED
10177
5.13M
  } else {
10178
2.36M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
2.36M
    (!ctxt->disableSAX))
10180
2.03M
    ctxt->sax->endElement(ctxt->userData, name);
10181
2.36M
#endif /* LIBXML_SAX1_ENABLED */
10182
2.36M
  }
10183
7.50M
  namePop(ctxt);
10184
7.50M
  spacePop(ctxt);
10185
7.50M
  if (nsNr != ctxt->nsNr)
10186
14.1k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
7.50M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
7.50M
  return(1);
10195
7.50M
    }
10196
14.9M
    if (RAW == '>') {
10197
14.1M
        NEXT1;
10198
14.1M
    } else {
10199
767k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
767k
         "Couldn't find end of Start Tag %s line %d\n",
10201
767k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
767k
  nodePop(ctxt);
10207
767k
  namePop(ctxt);
10208
767k
  spacePop(ctxt);
10209
767k
  if (nsNr != ctxt->nsNr)
10210
76.9k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
767k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
767k
  return(-1);
10223
767k
    }
10224
10225
14.1M
    return(0);
10226
14.9M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
13.7M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
13.7M
    xmlParserNodeInfo node_info;
10237
13.7M
    xmlNodePtr ret = ctxt->node;
10238
10239
13.7M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
13.7M
    if (ctxt->sax2) {
10249
8.49M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
8.49M
  namePop(ctxt);
10251
8.49M
    }
10252
5.20M
#ifdef LIBXML_SAX1_ENABLED
10253
5.20M
    else
10254
5.20M
  xmlParseEndTag1(ctxt, 0);
10255
13.7M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
13.7M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
13.7M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
446k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
446k
    xmlChar *buf = NULL;
10286
446k
    int len = 0;
10287
446k
    int size = 10;
10288
446k
    xmlChar cur;
10289
10290
446k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
446k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
446k
    cur = CUR;
10296
446k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
4.17k
  xmlFree(buf);
10298
4.17k
  return(NULL);
10299
4.17k
    }
10300
441k
    buf[len++] = cur;
10301
441k
    NEXT;
10302
441k
    cur=CUR;
10303
441k
    if (cur != '.') {
10304
4.79k
  xmlFree(buf);
10305
4.79k
  return(NULL);
10306
4.79k
    }
10307
437k
    buf[len++] = cur;
10308
437k
    NEXT;
10309
437k
    cur=CUR;
10310
1.74M
    while ((cur >= '0') && (cur <= '9')) {
10311
1.30M
  if (len + 1 >= size) {
10312
1.68k
      xmlChar *tmp;
10313
10314
1.68k
      size *= 2;
10315
1.68k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.68k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.68k
      buf = tmp;
10322
1.68k
  }
10323
1.30M
  buf[len++] = cur;
10324
1.30M
  NEXT;
10325
1.30M
  cur=CUR;
10326
1.30M
    }
10327
437k
    buf[len] = 0;
10328
437k
    return(buf);
10329
437k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
508k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
508k
    xmlChar *version = NULL;
10349
10350
508k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
455k
  SKIP(7);
10352
455k
  SKIP_BLANKS;
10353
455k
  if (RAW != '=') {
10354
5.34k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
5.34k
      return(NULL);
10356
5.34k
        }
10357
450k
  NEXT;
10358
450k
  SKIP_BLANKS;
10359
450k
  if (RAW == '"') {
10360
399k
      NEXT;
10361
399k
      version = xmlParseVersionNum(ctxt);
10362
399k
      if (RAW != '"') {
10363
17.8k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
17.8k
      } else
10365
381k
          NEXT;
10366
399k
  } else if (RAW == '\''){
10367
46.2k
      NEXT;
10368
46.2k
      version = xmlParseVersionNum(ctxt);
10369
46.2k
      if (RAW != '\'') {
10370
1.62k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
1.62k
      } else
10372
44.6k
          NEXT;
10373
46.2k
  } else {
10374
4.43k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
4.43k
  }
10376
450k
    }
10377
503k
    return(version);
10378
508k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
207k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
207k
    xmlChar *buf = NULL;
10395
207k
    int len = 0;
10396
207k
    int size = 10;
10397
207k
    xmlChar cur;
10398
10399
207k
    cur = CUR;
10400
207k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
207k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
206k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
206k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
206k
  buf[len++] = cur;
10409
206k
  NEXT;
10410
206k
  cur = CUR;
10411
2.79M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
2.79M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
2.79M
         ((cur >= '0') && (cur <= '9')) ||
10414
2.79M
         (cur == '.') || (cur == '_') ||
10415
2.79M
         (cur == '-')) {
10416
2.58M
      if (len + 1 >= size) {
10417
78.0k
          xmlChar *tmp;
10418
10419
78.0k
    size *= 2;
10420
78.0k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
78.0k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
78.0k
    buf = tmp;
10427
78.0k
      }
10428
2.58M
      buf[len++] = cur;
10429
2.58M
      NEXT;
10430
2.58M
      cur = CUR;
10431
2.58M
      if (cur == 0) {
10432
818
          SHRINK;
10433
818
    GROW;
10434
818
    cur = CUR;
10435
818
      }
10436
2.58M
        }
10437
206k
  buf[len] = 0;
10438
206k
    } else {
10439
836
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
836
    }
10441
207k
    return(buf);
10442
207k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
341k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
341k
    xmlChar *encoding = NULL;
10462
10463
341k
    SKIP_BLANKS;
10464
341k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
209k
  SKIP(8);
10466
209k
  SKIP_BLANKS;
10467
209k
  if (RAW != '=') {
10468
1.31k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.31k
      return(NULL);
10470
1.31k
        }
10471
208k
  NEXT;
10472
208k
  SKIP_BLANKS;
10473
208k
  if (RAW == '"') {
10474
171k
      NEXT;
10475
171k
      encoding = xmlParseEncName(ctxt);
10476
171k
      if (RAW != '"') {
10477
5.71k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
5.71k
    xmlFree((xmlChar *) encoding);
10479
5.71k
    return(NULL);
10480
5.71k
      } else
10481
166k
          NEXT;
10482
171k
  } else if (RAW == '\''){
10483
35.6k
      NEXT;
10484
35.6k
      encoding = xmlParseEncName(ctxt);
10485
35.6k
      if (RAW != '\'') {
10486
540
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
540
    xmlFree((xmlChar *) encoding);
10488
540
    return(NULL);
10489
540
      } else
10490
35.0k
          NEXT;
10491
35.6k
  } else {
10492
830
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
830
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
201k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
52.8k
      xmlFree((xmlChar *) encoding);
10500
52.8k
            return(NULL);
10501
52.8k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
149k
        if ((encoding != NULL) &&
10508
149k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
148k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
4.80k
      if ((ctxt->encoding == NULL) &&
10517
4.80k
          (ctxt->input->buf != NULL) &&
10518
4.80k
          (ctxt->input->buf->encoder == NULL)) {
10519
4.80k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
4.80k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
4.80k
      }
10522
4.80k
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
4.80k
      ctxt->encoding = encoding;
10525
4.80k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
144k
        else if ((encoding != NULL) &&
10530
144k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
143k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
72.4k
      if (ctxt->encoding != NULL)
10533
7
    xmlFree((xmlChar *) ctxt->encoding);
10534
72.4k
      ctxt->encoding = encoding;
10535
72.4k
  }
10536
71.8k
  else if (encoding != NULL) {
10537
71.1k
      xmlCharEncodingHandlerPtr handler;
10538
10539
71.1k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
71.1k
      ctxt->input->encoding = encoding;
10542
10543
71.1k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
71.1k
      if (handler != NULL) {
10545
69.9k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
197
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
197
        return(NULL);
10549
197
    }
10550
69.9k
      } else {
10551
1.15k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
1.15k
      "Unsupported encoding %s\n", encoding);
10553
1.15k
    return(NULL);
10554
1.15k
      }
10555
71.1k
  }
10556
149k
    }
10557
280k
    return(encoding);
10558
341k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
282k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
282k
    int standalone = -2;
10596
10597
282k
    SKIP_BLANKS;
10598
282k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
56.1k
  SKIP(10);
10600
56.1k
        SKIP_BLANKS;
10601
56.1k
  if (RAW != '=') {
10602
489
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
489
      return(standalone);
10604
489
        }
10605
55.6k
  NEXT;
10606
55.6k
  SKIP_BLANKS;
10607
55.6k
        if (RAW == '\''){
10608
34.2k
      NEXT;
10609
34.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
30.5k
          standalone = 0;
10611
30.5k
                SKIP(2);
10612
30.5k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
3.64k
                 (NXT(2) == 's')) {
10614
3.15k
          standalone = 1;
10615
3.15k
    SKIP(3);
10616
3.15k
            } else {
10617
489
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
489
      }
10619
34.2k
      if (RAW != '\'') {
10620
888
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
888
      } else
10622
33.3k
          NEXT;
10623
34.2k
  } else if (RAW == '"'){
10624
20.9k
      NEXT;
10625
20.9k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
10.7k
          standalone = 0;
10627
10.7k
    SKIP(2);
10628
10.7k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
10.2k
                 (NXT(2) == 's')) {
10630
9.39k
          standalone = 1;
10631
9.39k
                SKIP(3);
10632
9.39k
            } else {
10633
867
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
867
      }
10635
20.9k
      if (RAW != '"') {
10636
1.21k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
1.21k
      } else
10638
19.7k
          NEXT;
10639
20.9k
  } else {
10640
498
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
498
        }
10642
55.6k
    }
10643
282k
    return(standalone);
10644
282k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
487k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
487k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
487k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
487k
    SKIP(5);
10672
10673
487k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
487k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
487k
    version = xmlParseVersionInfo(ctxt);
10683
487k
    if (version == NULL) {
10684
69.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
417k
    } else {
10686
417k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
8.24k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
2.80k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
2.80k
                "Unsupported version '%s'\n",
10693
2.80k
                version);
10694
5.43k
      } else {
10695
5.43k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
4.73k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
4.73k
                      "Unsupported version '%s'\n",
10698
4.73k
          version, NULL);
10699
4.73k
    } else {
10700
701
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
701
              "Unsupported version '%s'\n",
10702
701
              version);
10703
701
    }
10704
5.43k
      }
10705
8.24k
  }
10706
417k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
417k
  ctxt->version = version;
10709
417k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
487k
    if (!IS_BLANK_CH(RAW)) {
10715
249k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
166k
      SKIP(2);
10717
166k
      return;
10718
166k
  }
10719
82.5k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
82.5k
    }
10721
320k
    xmlParseEncodingDecl(ctxt);
10722
320k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
320k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
997
        return;
10728
997
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
319k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
39.1k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
36.5k
      SKIP(2);
10736
36.5k
      return;
10737
36.5k
  }
10738
2.57k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
2.57k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
282k
    GROW;
10745
10746
282k
    SKIP_BLANKS;
10747
282k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
282k
    SKIP_BLANKS;
10750
282k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
163k
        SKIP(2);
10752
163k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
1.46k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
1.46k
  NEXT;
10756
117k
    } else {
10757
117k
        int c;
10758
10759
117k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
5.03M
        while ((c = CUR) != 0) {
10761
5.02M
            NEXT;
10762
5.02M
            if (c == '>')
10763
108k
                break;
10764
5.02M
        }
10765
117k
    }
10766
282k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
671k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
785k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
785k
        SKIP_BLANKS;
10783
785k
        GROW;
10784
785k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
70.4k
      xmlParsePI(ctxt);
10786
714k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
43.4k
      xmlParseComment(ctxt);
10788
671k
        } else {
10789
671k
            break;
10790
671k
        }
10791
785k
    }
10792
671k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
321k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
321k
    xmlChar start[4];
10812
321k
    xmlCharEncoding enc;
10813
10814
321k
    xmlInitParser();
10815
10816
321k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
321k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
321k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
321k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
321k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
321k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
321k
    if ((ctxt->encoding == NULL) &&
10835
321k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
313k
  start[0] = RAW;
10842
313k
  start[1] = NXT(1);
10843
313k
  start[2] = NXT(2);
10844
313k
  start[3] = NXT(3);
10845
313k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
313k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
176k
      xmlSwitchEncoding(ctxt, enc);
10848
176k
  }
10849
313k
    }
10850
10851
10852
321k
    if (CUR == 0) {
10853
3.00k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
3.00k
  return(-1);
10855
3.00k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
318k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
24.9k
       GROW;
10865
24.9k
    }
10866
318k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
163k
  xmlParseXMLDecl(ctxt);
10872
163k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
163k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
373
      return(-1);
10878
373
  }
10879
163k
  ctxt->standalone = ctxt->input->standalone;
10880
163k
  SKIP_BLANKS;
10881
163k
    } else {
10882
154k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
154k
    }
10884
317k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
301k
        ctxt->sax->startDocument(ctxt->userData);
10886
317k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
317k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
317k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
317k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
317k
    GROW;
10903
317k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
193k
  ctxt->inSubset = 1;
10906
193k
  xmlParseDocTypeDecl(ctxt);
10907
193k
  if (RAW == '[') {
10908
139k
      ctxt->instate = XML_PARSER_DTD;
10909
139k
      xmlParseInternalSubset(ctxt);
10910
139k
      if (ctxt->instate == XML_PARSER_EOF)
10911
42.2k
    return(-1);
10912
139k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
151k
  ctxt->inSubset = 2;
10918
151k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
151k
      (!ctxt->disableSAX))
10920
142k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
142k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
151k
  if (ctxt->instate == XML_PARSER_EOF)
10923
19.8k
      return(-1);
10924
131k
  ctxt->inSubset = 0;
10925
10926
131k
        xmlCleanSpecialAttr(ctxt);
10927
10928
131k
  ctxt->instate = XML_PARSER_PROLOG;
10929
131k
  xmlParseMisc(ctxt);
10930
131k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
255k
    GROW;
10936
255k
    if (RAW != '<') {
10937
33.3k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
33.3k
           "Start tag expected, '<' not found\n");
10939
222k
    } else {
10940
222k
  ctxt->instate = XML_PARSER_CONTENT;
10941
222k
  xmlParseElement(ctxt);
10942
222k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
222k
  xmlParseMisc(ctxt);
10949
10950
222k
  if (RAW != 0) {
10951
74.0k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
74.0k
  }
10953
222k
  ctxt->instate = XML_PARSER_EOF;
10954
222k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
255k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
255k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
255k
    if ((ctxt->myDoc != NULL) &&
10966
255k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
1.62k
  xmlFreeDoc(ctxt->myDoc);
10968
1.62k
  ctxt->myDoc = NULL;
10969
1.62k
    }
10970
10971
255k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
19.3k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
19.3k
  if (ctxt->valid)
10974
11.4k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
19.3k
  if (ctxt->nsWellFormed)
10976
18.3k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
19.3k
  if (ctxt->options & XML_PARSE_OLD10)
10978
4.09k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
19.3k
    }
10980
255k
    if (! ctxt->wellFormed) {
10981
236k
  ctxt->valid = 0;
10982
236k
  return(-1);
10983
236k
    }
10984
19.3k
    return(0);
10985
255k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
39.9M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
39.9M
    const xmlChar *cur;
11110
11111
39.9M
    if (ctxt->checkIndex == 0) {
11112
39.0M
        cur = ctxt->input->cur + 1;
11113
39.0M
    } else {
11114
882k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
882k
    }
11116
11117
39.9M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
909k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
909k
        return(0);
11120
39.0M
    } else {
11121
39.0M
        ctxt->checkIndex = 0;
11122
39.0M
        return(1);
11123
39.0M
    }
11124
39.9M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
3.64M
                     const char *str, size_t strLen) {
11138
3.64M
    const xmlChar *cur, *term;
11139
11140
3.64M
    if (ctxt->checkIndex == 0) {
11141
2.14M
        cur = ctxt->input->cur + startDelta;
11142
2.14M
    } else {
11143
1.50M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.50M
    }
11145
11146
3.64M
    term = BAD_CAST strstr((const char *) cur, str);
11147
3.64M
    if (term == NULL) {
11148
1.90M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
1.90M
        if ((size_t) (end - cur) < strLen)
11152
50.4k
            end = cur;
11153
1.85M
        else
11154
1.85M
            end -= strLen - 1;
11155
1.90M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
1.90M
    } else {
11157
1.74M
        ctxt->checkIndex = 0;
11158
1.74M
    }
11159
11160
3.64M
    return(term);
11161
3.64M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
39.7M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
39.7M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
39.7M
    const xmlChar *end = ctxt->input->end;
11173
11174
778M
    while (cur < end) {
11175
773M
        if ((*cur == '<') || (*cur == '&')) {
11176
34.5M
            ctxt->checkIndex = 0;
11177
34.5M
            return(1);
11178
34.5M
        }
11179
738M
        cur++;
11180
738M
    }
11181
11182
5.26M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
5.26M
    return(0);
11184
39.7M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
32.1M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
32.1M
    const xmlChar *cur;
11196
32.1M
    const xmlChar *end = ctxt->input->end;
11197
32.1M
    int state = ctxt->endCheckState;
11198
11199
32.1M
    if (ctxt->checkIndex == 0)
11200
26.7M
        cur = ctxt->input->cur + 1;
11201
5.38M
    else
11202
5.38M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
1.09G
    while (cur < end) {
11205
1.09G
        if (state) {
11206
585M
            if (*cur == state)
11207
33.5M
                state = 0;
11208
585M
        } else if (*cur == '\'' || *cur == '"') {
11209
33.6M
            state = *cur;
11210
473M
        } else if (*cur == '>') {
11211
26.5M
            ctxt->checkIndex = 0;
11212
26.5M
            ctxt->endCheckState = 0;
11213
26.5M
            return(1);
11214
26.5M
        }
11215
1.06G
        cur++;
11216
1.06G
    }
11217
11218
5.52M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
5.52M
    ctxt->endCheckState = state;
11220
5.52M
    return(0);
11221
32.1M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
2.09M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
2.09M
    const xmlChar *cur, *start;
11240
2.09M
    const xmlChar *end = ctxt->input->end;
11241
2.09M
    int state = ctxt->endCheckState;
11242
11243
2.09M
    if (ctxt->checkIndex == 0) {
11244
249k
        cur = ctxt->input->cur + 1;
11245
1.84M
    } else {
11246
1.84M
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
1.84M
    }
11248
2.09M
    start = cur;
11249
11250
398M
    while (cur < end) {
11251
396M
        if (state == '-') {
11252
25.2M
            if ((*cur == '-') &&
11253
25.2M
                (cur[1] == '-') &&
11254
25.2M
                (cur[2] == '>')) {
11255
272k
                state = 0;
11256
272k
                cur += 3;
11257
272k
                start = cur;
11258
272k
                continue;
11259
272k
            }
11260
25.2M
        }
11261
370M
        else if (state == ']') {
11262
256k
            if (*cur == '>') {
11263
207k
                ctxt->checkIndex = 0;
11264
207k
                ctxt->endCheckState = 0;
11265
207k
                return(1);
11266
207k
            }
11267
49.3k
            if (IS_BLANK_CH(*cur)) {
11268
29.2k
                state = ' ';
11269
29.2k
            } else if (*cur != ']') {
11270
8.82k
                state = 0;
11271
8.82k
                start = cur;
11272
8.82k
                continue;
11273
8.82k
            }
11274
49.3k
        }
11275
370M
        else if (state == ' ') {
11276
70.2k
            if (*cur == '>') {
11277
1.88k
                ctxt->checkIndex = 0;
11278
1.88k
                ctxt->endCheckState = 0;
11279
1.88k
                return(1);
11280
1.88k
            }
11281
68.3k
            if (!IS_BLANK_CH(*cur)) {
11282
27.1k
                state = 0;
11283
27.1k
                start = cur;
11284
27.1k
                continue;
11285
27.1k
            }
11286
68.3k
        }
11287
370M
        else if (state != 0) {
11288
256M
            if (*cur == state) {
11289
2.19M
                state = 0;
11290
2.19M
                start = cur + 1;
11291
2.19M
            }
11292
256M
        }
11293
113M
        else if (*cur == '<') {
11294
2.86M
            if ((cur[1] == '!') &&
11295
2.86M
                (cur[2] == '-') &&
11296
2.86M
                (cur[3] == '-')) {
11297
275k
                state = '-';
11298
275k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
275k
                start = cur;
11301
275k
                continue;
11302
275k
            }
11303
2.86M
        }
11304
111M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
2.45M
            state = *cur;
11306
2.45M
        }
11307
11308
395M
        cur++;
11309
395M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
1.88M
    if ((state == 0) || (state == '-')) {
11316
619k
        if (cur - start < 3)
11317
51.5k
            cur = start;
11318
568k
        else
11319
568k
            cur -= 3;
11320
619k
    }
11321
1.88M
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
1.88M
    ctxt->endCheckState = state;
11323
1.88M
    return(0);
11324
2.09M
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
993k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
993k
    int ix;
11340
993k
    unsigned char c;
11341
993k
    int codepoint;
11342
11343
993k
    if ((utf == NULL) || (len <= 0))
11344
4.81k
        return(0);
11345
11346
39.1M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
38.9M
        c = utf[ix];
11348
38.9M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
35.6M
      if (c >= 0x20)
11350
33.6M
    ix++;
11351
1.97M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
1.77M
          ix++;
11353
198k
      else
11354
198k
          return(-ix);
11355
35.6M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
1.49M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
1.48M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
363k
          return(-ix);
11359
1.11M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
1.11M
      codepoint |= utf[ix+1] & 0x3f;
11361
1.11M
      if (!xmlIsCharQ(codepoint))
11362
6.57k
          return(-ix);
11363
1.11M
      ix += 2;
11364
1.81M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
614k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
603k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
603k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
48.0k
        return(-ix);
11369
555k
      codepoint = (utf[ix] & 0xf) << 12;
11370
555k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
555k
      codepoint |= utf[ix+2] & 0x3f;
11372
555k
      if (!xmlIsCharQ(codepoint))
11373
9.06k
          return(-ix);
11374
546k
      ix += 3;
11375
1.19M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
1.11M
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
1.10M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
1.10M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
1.10M
    ((utf[ix+3] & 0xc0) != 0x80))
11380
43.3k
        return(-ix);
11381
1.06M
      codepoint = (utf[ix] & 0x7) << 18;
11382
1.06M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
1.06M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
1.06M
      codepoint |= utf[ix+3] & 0x3f;
11385
1.06M
      if (!xmlIsCharQ(codepoint))
11386
16.4k
          return(-ix);
11387
1.04M
      ix += 4;
11388
1.04M
  } else       /* unknown encoding */
11389
84.1k
      return(-ix);
11390
38.9M
      }
11391
184k
      return(ix);
11392
988k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
17.9M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
17.9M
    int ret = 0;
11406
17.9M
    int avail, tlen;
11407
17.9M
    xmlChar cur, next;
11408
11409
17.9M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
17.9M
    if ((ctxt->input != NULL) &&
11466
17.9M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
365k
        xmlParserInputShrink(ctxt->input);
11468
365k
    }
11469
11470
191M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
191M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
187k
      return(0);
11473
11474
191M
  if (ctxt->input == NULL) break;
11475
191M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
191M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
191M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
191M
          (ctxt->input->buf->raw != NULL) &&
11488
191M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
285k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
285k
                                                 ctxt->input);
11491
285k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
285k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
285k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
285k
                                      base, current);
11496
285k
      }
11497
191M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
191M
        (ctxt->input->cur - ctxt->input->base);
11499
191M
  }
11500
191M
        if (avail < 1)
11501
894k
      goto done;
11502
190M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
1.67M
            case XML_PARSER_START:
11509
1.67M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
488k
        xmlChar start[4];
11511
488k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
488k
        if (avail < 4)
11517
37.0k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
451k
        start[0] = RAW;
11527
451k
        start[1] = NXT(1);
11528
451k
        start[2] = NXT(2);
11529
451k
        start[3] = NXT(3);
11530
451k
        enc = xmlDetectCharEncoding(start, 4);
11531
451k
        xmlSwitchEncoding(ctxt, enc);
11532
451k
        break;
11533
488k
    }
11534
11535
1.18M
    if (avail < 2)
11536
640
        goto done;
11537
1.18M
    cur = ctxt->input->cur[0];
11538
1.18M
    next = ctxt->input->cur[1];
11539
1.18M
    if (cur == 0) {
11540
4.38k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
4.38k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
4.38k
                  &xmlDefaultSAXLocator);
11543
4.38k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
4.38k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
4.38k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
4.38k
      ctxt->sax->endDocument(ctxt->userData);
11551
4.38k
        goto done;
11552
4.38k
    }
11553
1.18M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
919k
        if (avail < 5) goto done;
11556
919k
        if ((!terminate) &&
11557
919k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
564k
      goto done;
11559
355k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
355k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
355k
                  &xmlDefaultSAXLocator);
11562
355k
        if ((ctxt->input->cur[2] == 'x') &&
11563
355k
      (ctxt->input->cur[3] == 'm') &&
11564
355k
      (ctxt->input->cur[4] == 'l') &&
11565
355k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
323k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
323k
      xmlParseXMLDecl(ctxt);
11572
323k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
624
          xmlHaltParser(ctxt);
11578
624
          return(0);
11579
624
      }
11580
323k
      ctxt->standalone = ctxt->input->standalone;
11581
323k
      if ((ctxt->encoding == NULL) &&
11582
323k
          (ctxt->input->encoding != NULL))
11583
45.1k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
323k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
323k
          (!ctxt->disableSAX))
11586
292k
          ctxt->sax->startDocument(ctxt->userData);
11587
323k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
323k
        } else {
11593
31.2k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
31.2k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
31.2k
          (!ctxt->disableSAX))
11596
31.2k
          ctxt->sax->startDocument(ctxt->userData);
11597
31.2k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
31.2k
        }
11603
355k
    } else {
11604
262k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
262k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
262k
                  &xmlDefaultSAXLocator);
11607
262k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
262k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
262k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
262k
            (!ctxt->disableSAX))
11614
262k
      ctxt->sax->startDocument(ctxt->userData);
11615
262k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
262k
    }
11621
617k
    break;
11622
34.7M
            case XML_PARSER_START_TAG: {
11623
34.7M
          const xmlChar *name;
11624
34.7M
    const xmlChar *prefix = NULL;
11625
34.7M
    const xmlChar *URI = NULL;
11626
34.7M
                int line = ctxt->input->line;
11627
34.7M
    int nsNr = ctxt->nsNr;
11628
11629
34.7M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
34.7M
    cur = ctxt->input->cur[0];
11632
34.7M
          if (cur != '<') {
11633
30.7k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
30.7k
        xmlHaltParser(ctxt);
11635
30.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
30.7k
      ctxt->sax->endDocument(ctxt->userData);
11637
30.7k
        goto done;
11638
30.7k
    }
11639
34.7M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
5.33M
                    goto done;
11641
29.4M
    if (ctxt->spaceNr == 0)
11642
168k
        spacePush(ctxt, -1);
11643
29.2M
    else if (*ctxt->space == -2)
11644
2.54M
        spacePush(ctxt, -1);
11645
26.6M
    else
11646
26.6M
        spacePush(ctxt, *ctxt->space);
11647
29.4M
#ifdef LIBXML_SAX1_ENABLED
11648
29.4M
    if (ctxt->sax2)
11649
17.8M
#endif /* LIBXML_SAX1_ENABLED */
11650
17.8M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
11.5M
#ifdef LIBXML_SAX1_ENABLED
11652
11.5M
    else
11653
11.5M
        name = xmlParseStartTag(ctxt);
11654
29.4M
#endif /* LIBXML_SAX1_ENABLED */
11655
29.4M
    if (ctxt->instate == XML_PARSER_EOF)
11656
1.49k
        goto done;
11657
29.3M
    if (name == NULL) {
11658
35.1k
        spacePop(ctxt);
11659
35.1k
        xmlHaltParser(ctxt);
11660
35.1k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
35.1k
      ctxt->sax->endDocument(ctxt->userData);
11662
35.1k
        goto done;
11663
35.1k
    }
11664
29.3M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
29.3M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
29.3M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
29.3M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
29.3M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
9.82M
        SKIP(2);
11680
11681
9.82M
        if (ctxt->sax2) {
11682
6.17M
      if ((ctxt->sax != NULL) &&
11683
6.17M
          (ctxt->sax->endElementNs != NULL) &&
11684
6.17M
          (!ctxt->disableSAX))
11685
6.17M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
6.17M
                                  prefix, URI);
11687
6.17M
      if (ctxt->nsNr - nsNr > 0)
11688
10.9k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
6.17M
#ifdef LIBXML_SAX1_ENABLED
11690
6.17M
        } else {
11691
3.64M
      if ((ctxt->sax != NULL) &&
11692
3.64M
          (ctxt->sax->endElement != NULL) &&
11693
3.64M
          (!ctxt->disableSAX))
11694
3.64M
          ctxt->sax->endElement(ctxt->userData, name);
11695
3.64M
#endif /* LIBXML_SAX1_ENABLED */
11696
3.64M
        }
11697
9.82M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
9.82M
        spacePop(ctxt);
11700
9.82M
        if (ctxt->nameNr == 0) {
11701
11.2k
      ctxt->instate = XML_PARSER_EPILOG;
11702
9.81M
        } else {
11703
9.81M
      ctxt->instate = XML_PARSER_CONTENT;
11704
9.81M
        }
11705
9.82M
        break;
11706
9.82M
    }
11707
19.5M
    if (RAW == '>') {
11708
18.5M
        NEXT;
11709
18.5M
    } else {
11710
1.00M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
1.00M
           "Couldn't find end of Start Tag %s\n",
11712
1.00M
           name);
11713
1.00M
        nodePop(ctxt);
11714
1.00M
        spacePop(ctxt);
11715
1.00M
    }
11716
19.5M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
19.5M
    ctxt->instate = XML_PARSER_CONTENT;
11719
19.5M
                break;
11720
29.3M
      }
11721
130M
            case XML_PARSER_CONTENT: {
11722
130M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
621k
        goto done;
11724
129M
    cur = ctxt->input->cur[0];
11725
129M
    next = ctxt->input->cur[1];
11726
11727
129M
    if ((cur == '<') && (next == '/')) {
11728
18.0M
        ctxt->instate = XML_PARSER_END_TAG;
11729
18.0M
        break;
11730
111M
          } else if ((cur == '<') && (next == '?')) {
11731
250k
        if ((!terminate) &&
11732
250k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
145k
      goto done;
11734
105k
        xmlParsePI(ctxt);
11735
105k
        ctxt->instate = XML_PARSER_CONTENT;
11736
111M
    } else if ((cur == '<') && (next != '!')) {
11737
29.0M
        ctxt->instate = XML_PARSER_START_TAG;
11738
29.0M
        break;
11739
82.2M
    } else if ((cur == '<') && (next == '!') &&
11740
82.2M
               (ctxt->input->cur[2] == '-') &&
11741
82.2M
         (ctxt->input->cur[3] == '-')) {
11742
971k
        if ((!terminate) &&
11743
971k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
389k
      goto done;
11745
582k
        xmlParseComment(ctxt);
11746
582k
        ctxt->instate = XML_PARSER_CONTENT;
11747
81.2M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
81.2M
        (ctxt->input->cur[2] == '[') &&
11749
81.2M
        (ctxt->input->cur[3] == 'C') &&
11750
81.2M
        (ctxt->input->cur[4] == 'D') &&
11751
81.2M
        (ctxt->input->cur[5] == 'A') &&
11752
81.2M
        (ctxt->input->cur[6] == 'T') &&
11753
81.2M
        (ctxt->input->cur[7] == 'A') &&
11754
81.2M
        (ctxt->input->cur[8] == '[')) {
11755
106k
        SKIP(9);
11756
106k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
106k
        break;
11758
81.1M
    } else if ((cur == '<') && (next == '!') &&
11759
81.1M
               (avail < 9)) {
11760
30.2k
        goto done;
11761
81.1M
    } else if (cur == '<') {
11762
453k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
453k
                    "detected an error in element content\n");
11764
453k
                    SKIP(1);
11765
80.7M
    } else if (cur == '&') {
11766
27.1M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
345k
      goto done;
11768
26.7M
        xmlParseReference(ctxt);
11769
53.5M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
53.5M
        if ((ctxt->inputNr == 1) &&
11783
53.5M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
40.3M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
5.26M
          goto done;
11786
40.3M
                    }
11787
48.2M
                    ctxt->checkIndex = 0;
11788
48.2M
        xmlParseCharData(ctxt, 0);
11789
48.2M
    }
11790
76.2M
    break;
11791
129M
      }
11792
76.2M
            case XML_PARSER_END_TAG:
11793
18.5M
    if (avail < 2)
11794
0
        goto done;
11795
18.5M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
564k
        goto done;
11797
18.0M
    if (ctxt->sax2) {
11798
10.8M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
10.8M
        nameNsPop(ctxt);
11800
10.8M
    }
11801
7.18M
#ifdef LIBXML_SAX1_ENABLED
11802
7.18M
      else
11803
7.18M
        xmlParseEndTag1(ctxt, 0);
11804
18.0M
#endif /* LIBXML_SAX1_ENABLED */
11805
18.0M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
18.0M
    } else if (ctxt->nameNr == 0) {
11808
73.4k
        ctxt->instate = XML_PARSER_EPILOG;
11809
17.9M
    } else {
11810
17.9M
        ctxt->instate = XML_PARSER_CONTENT;
11811
17.9M
    }
11812
18.0M
    break;
11813
1.27M
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
1.27M
    const xmlChar *term;
11819
11820
1.27M
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
11.5k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
11.5k
                                           "]]>");
11827
1.25M
                } else {
11828
1.25M
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
1.25M
                }
11830
11831
1.27M
    if (term == NULL) {
11832
631k
        int tmp, size;
11833
11834
631k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
4.99k
                        size = ctxt->input->end - ctxt->input->cur;
11837
626k
                    } else {
11838
626k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
276k
                            goto done;
11840
349k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
349k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
349k
                    }
11844
354k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
354k
                    if (tmp <= 0) {
11846
253k
                        tmp = -tmp;
11847
253k
                        ctxt->input->cur += tmp;
11848
253k
                        goto encoding_error;
11849
253k
                    }
11850
100k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
100k
                        if (ctxt->sax->cdataBlock != NULL)
11852
55.1k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
55.1k
                                                  ctxt->input->cur, tmp);
11854
45.6k
                        else if (ctxt->sax->characters != NULL)
11855
45.6k
                            ctxt->sax->characters(ctxt->userData,
11856
45.6k
                                                  ctxt->input->cur, tmp);
11857
100k
                    }
11858
100k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
100k
                    SKIPL(tmp);
11861
638k
    } else {
11862
638k
                    int base = term - CUR_PTR;
11863
638k
        int tmp;
11864
11865
638k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
638k
        if ((tmp < 0) || (tmp != base)) {
11867
546k
      tmp = -tmp;
11868
546k
      ctxt->input->cur += tmp;
11869
546k
      goto encoding_error;
11870
546k
        }
11871
91.9k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
91.9k
            (ctxt->sax->cdataBlock != NULL) &&
11873
91.9k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
2.98k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
2.98k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
2.98k
                     "<![CDATA[", 9)))
11882
2.95k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
2.95k
                                 BAD_CAST "", 0);
11884
89.0k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
89.0k
      (!ctxt->disableSAX)) {
11886
87.1k
      if (ctxt->sax->cdataBlock != NULL)
11887
54.5k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
54.5k
              ctxt->input->cur, base);
11889
32.6k
      else if (ctxt->sax->characters != NULL)
11890
32.6k
          ctxt->sax->characters(ctxt->userData,
11891
32.6k
              ctxt->input->cur, base);
11892
87.1k
        }
11893
91.9k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
91.9k
        SKIPL(base + 3);
11896
91.9k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
91.9k
    }
11902
192k
    break;
11903
1.27M
      }
11904
993k
            case XML_PARSER_MISC:
11905
1.38M
            case XML_PARSER_PROLOG:
11906
1.48M
            case XML_PARSER_EPILOG:
11907
1.48M
    SKIP_BLANKS;
11908
1.48M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.48M
    else
11912
1.48M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.48M
                (ctxt->input->cur - ctxt->input->base);
11914
1.48M
    if (avail < 2)
11915
67.5k
        goto done;
11916
1.42M
    cur = ctxt->input->cur[0];
11917
1.42M
    next = ctxt->input->cur[1];
11918
1.42M
          if ((cur == '<') && (next == '?')) {
11919
172k
        if ((!terminate) &&
11920
172k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
54.7k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
117k
        xmlParsePI(ctxt);
11927
117k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
1.24M
    } else if ((cur == '<') && (next == '!') &&
11930
1.24M
        (ctxt->input->cur[2] == '-') &&
11931
1.24M
        (ctxt->input->cur[3] == '-')) {
11932
199k
        if ((!terminate) &&
11933
199k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
123k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
76.1k
        xmlParseComment(ctxt);
11940
76.1k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
1.04M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
1.04M
                    (cur == '<') && (next == '!') &&
11944
1.04M
        (ctxt->input->cur[2] == 'D') &&
11945
1.04M
        (ctxt->input->cur[3] == 'O') &&
11946
1.04M
        (ctxt->input->cur[4] == 'C') &&
11947
1.04M
        (ctxt->input->cur[5] == 'T') &&
11948
1.04M
        (ctxt->input->cur[6] == 'Y') &&
11949
1.04M
        (ctxt->input->cur[7] == 'P') &&
11950
1.04M
        (ctxt->input->cur[8] == 'E')) {
11951
559k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
193k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
366k
        ctxt->inSubset = 1;
11958
366k
        xmlParseDocTypeDecl(ctxt);
11959
366k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
366k
        if (RAW == '[') {
11962
263k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
263k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
103k
      ctxt->inSubset = 2;
11972
103k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
103k
          (ctxt->sax->externalSubset != NULL))
11974
99.3k
          ctxt->sax->externalSubset(ctxt->userData,
11975
99.3k
            ctxt->intSubName, ctxt->extSubSystem,
11976
99.3k
            ctxt->extSubURI);
11977
103k
      ctxt->inSubset = 0;
11978
103k
      xmlCleanSpecialAttr(ctxt);
11979
103k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
103k
        }
11985
488k
    } else if ((cur == '<') && (next == '!') &&
11986
488k
               (avail <
11987
42.6k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
36.9k
        goto done;
11989
451k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
16.5k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
16.5k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
16.5k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
16.5k
      ctxt->sax->endDocument(ctxt->userData);
11998
16.5k
        goto done;
11999
434k
                } else {
12000
434k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
434k
    }
12006
995k
    break;
12007
2.13M
            case XML_PARSER_DTD: {
12008
2.13M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
1.88M
                    goto done;
12010
245k
    xmlParseInternalSubset(ctxt);
12011
245k
    if (ctxt->instate == XML_PARSER_EOF)
12012
58.1k
        goto done;
12013
187k
    ctxt->inSubset = 2;
12014
187k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
187k
        (ctxt->sax->externalSubset != NULL))
12016
182k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
182k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
187k
    ctxt->inSubset = 0;
12019
187k
    xmlCleanSpecialAttr(ctxt);
12020
187k
    if (ctxt->instate == XML_PARSER_EOF)
12021
14.3k
        goto done;
12022
173k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
173k
                break;
12028
187k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
190M
  }
12102
190M
    }
12103
16.9M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
16.9M
    return(ret);
12108
800k
encoding_error:
12109
800k
    {
12110
800k
        char buffer[150];
12111
12112
800k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
800k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
800k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
800k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
800k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
800k
         BAD_CAST buffer, NULL);
12118
800k
    }
12119
800k
    return(0);
12120
17.9M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
26.1M
              int terminate) {
12136
26.1M
    int end_in_lf = 0;
12137
26.1M
    int remain = 0;
12138
12139
26.1M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
26.1M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
8.18M
        return(ctxt->errNo);
12143
17.9M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.64k
        return(-1);
12145
17.9M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
17.9M
    ctxt->progressive = 1;
12149
17.9M
    if (ctxt->instate == XML_PARSER_START)
12150
1.17M
        xmlDetectSAX2(ctxt);
12151
17.9M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
17.9M
        (chunk[size - 1] == '\r')) {
12153
89.0k
  end_in_lf = 1;
12154
89.0k
  size--;
12155
89.0k
    }
12156
12157
17.9M
xmldecl_done:
12158
12159
17.9M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
17.9M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
17.5M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
17.5M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
17.5M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
17.5M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
17.5M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
90.2k
            unsigned int len = 45;
12173
12174
90.2k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
90.2k
                               BAD_CAST "UTF-16")) ||
12176
90.2k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
35.4k
                               BAD_CAST "UTF16")))
12178
54.8k
                len = 90;
12179
35.4k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
35.4k
                                    BAD_CAST "UCS-4")) ||
12181
35.4k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
34.9k
                                    BAD_CAST "UCS4")))
12183
469
                len = 180;
12184
12185
90.2k
            if (ctxt->input->buf->rawconsumed < len)
12186
7.25k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
90.2k
            if ((unsigned int) size > len) {
12194
61.1k
                remain = size - len;
12195
61.1k
                size = len;
12196
61.1k
            } else {
12197
29.1k
                remain = 0;
12198
29.1k
            }
12199
90.2k
        }
12200
17.5M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
17.5M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
17.5M
  if (res < 0) {
12203
2.13k
      ctxt->errNo = XML_PARSER_EOF;
12204
2.13k
      xmlHaltParser(ctxt);
12205
2.13k
      return (XML_PARSER_EOF);
12206
2.13k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
17.5M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
394k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
394k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
394k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
394k
        (in->raw != NULL)) {
12216
29.2k
    int nbchars;
12217
29.2k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
29.2k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
29.2k
    nbchars = xmlCharEncInput(in, terminate);
12221
29.2k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
29.2k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
3.12k
        xmlGenericError(xmlGenericErrorContext,
12225
3.12k
            "xmlParseChunk: encoder error\n");
12226
3.12k
                    xmlHaltParser(ctxt);
12227
3.12k
        return(XML_ERR_INVALID_ENCODING);
12228
3.12k
    }
12229
29.2k
      }
12230
394k
  }
12231
394k
    }
12232
12233
17.9M
    if (remain != 0) {
12234
60.3k
        xmlParseTryOrFinish(ctxt, 0);
12235
17.9M
    } else {
12236
17.9M
        xmlParseTryOrFinish(ctxt, terminate);
12237
17.9M
    }
12238
17.9M
    if (ctxt->instate == XML_PARSER_EOF)
12239
162k
        return(ctxt->errNo);
12240
12241
17.8M
    if ((ctxt->input != NULL) &&
12242
17.8M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
17.8M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
17.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
17.8M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
190k
        return(ctxt->errNo);
12250
12251
17.6M
    if (remain != 0) {
12252
59.4k
        chunk += size;
12253
59.4k
        size = remain;
12254
59.4k
        remain = 0;
12255
59.4k
        goto xmldecl_done;
12256
59.4k
    }
12257
17.5M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
17.5M
        (ctxt->input->buf != NULL)) {
12259
87.6k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
87.6k
           ctxt->input);
12261
87.6k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
87.6k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
87.6k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
87.6k
            base, current);
12267
87.6k
    }
12268
17.5M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
190k
  int cur_avail = 0;
12273
12274
190k
  if (ctxt->input != NULL) {
12275
190k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
190k
      else
12279
190k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
190k
                    (ctxt->input->cur - ctxt->input->base);
12281
190k
  }
12282
12283
190k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
190k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
134k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
134k
  }
12287
190k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
1.70k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
1.70k
  }
12290
190k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
190k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
190k
    ctxt->sax->endDocument(ctxt->userData);
12293
190k
  }
12294
190k
  ctxt->instate = XML_PARSER_EOF;
12295
190k
    }
12296
17.5M
    if (ctxt->wellFormed == 0)
12297
9.39M
  return((xmlParserErrors) ctxt->errNo);
12298
8.17M
    else
12299
8.17M
        return(0);
12300
17.5M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
643k
                        const char *chunk, int size, const char *filename) {
12330
643k
    xmlParserCtxtPtr ctxt;
12331
643k
    xmlParserInputPtr inputStream;
12332
643k
    xmlParserInputBufferPtr buf;
12333
643k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
643k
    if ((chunk != NULL) && (size >= 4))
12339
313k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
643k
    buf = xmlAllocParserInputBuffer(enc);
12342
643k
    if (buf == NULL) return(NULL);
12343
12344
643k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
643k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
643k
    ctxt->dictNames = 1;
12351
643k
    if (filename == NULL) {
12352
321k
  ctxt->directory = NULL;
12353
321k
    } else {
12354
321k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
321k
    }
12356
12357
643k
    inputStream = xmlNewInputStream(ctxt);
12358
643k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
643k
    if (filename == NULL)
12365
321k
  inputStream->filename = NULL;
12366
321k
    else {
12367
321k
  inputStream->filename = (char *)
12368
321k
      xmlCanonicPath((const xmlChar *) filename);
12369
321k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
321k
    }
12376
643k
    inputStream->buf = buf;
12377
643k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
643k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
643k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
643k
    if ((size != 0) && (chunk != NULL) &&
12388
643k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
313k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
313k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
313k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
313k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
313k
    }
12399
12400
643k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
176k
        xmlSwitchEncoding(ctxt, enc);
12402
176k
    }
12403
12404
643k
    return(ctxt);
12405
643k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
589k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
589k
    if (ctxt == NULL)
12418
0
        return;
12419
589k
    ctxt->instate = XML_PARSER_EOF;
12420
589k
    ctxt->disableSAX = 1;
12421
679k
    while (ctxt->inputNr > 1)
12422
89.5k
        xmlFreeInputStream(inputPop(ctxt));
12423
589k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
589k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
589k
        if (ctxt->input->buf != NULL) {
12433
523k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
523k
            ctxt->input->buf = NULL;
12435
523k
        }
12436
589k
  ctxt->input->cur = BAD_CAST"";
12437
589k
        ctxt->input->length = 0;
12438
589k
  ctxt->input->base = ctxt->input->cur;
12439
589k
        ctxt->input->end = ctxt->input->cur;
12440
589k
    }
12441
589k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
322k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
322k
    if (ctxt == NULL)
12452
0
        return;
12453
322k
    xmlHaltParser(ctxt);
12454
322k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
322k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
822k
          const xmlChar *ID, xmlNodePtr *list) {
12832
822k
    xmlParserCtxtPtr ctxt;
12833
822k
    xmlDocPtr newDoc;
12834
822k
    xmlNodePtr newRoot;
12835
822k
    xmlParserErrors ret = XML_ERR_OK;
12836
822k
    xmlChar start[4];
12837
822k
    xmlCharEncoding enc;
12838
12839
822k
    if (((depth > 40) &&
12840
822k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
822k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
822k
    if (list != NULL)
12848
95.0k
        *list = NULL;
12849
822k
    if ((URL == NULL) && (ID == NULL))
12850
941
  return(XML_ERR_INTERNAL_ERROR);
12851
821k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
821k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
821k
                                             oldctxt);
12856
821k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
201k
    if (oldctxt != NULL) {
12858
201k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
201k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
201k
    }
12861
201k
    xmlDetectSAX2(ctxt);
12862
12863
201k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
201k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
201k
    newDoc->properties = XML_DOC_INTERNAL;
12869
201k
    if (doc) {
12870
201k
        newDoc->intSubset = doc->intSubset;
12871
201k
        newDoc->extSubset = doc->extSubset;
12872
201k
        if (doc->dict) {
12873
135k
            newDoc->dict = doc->dict;
12874
135k
            xmlDictReference(newDoc->dict);
12875
135k
        }
12876
201k
        if (doc->URL != NULL) {
12877
122k
            newDoc->URL = xmlStrdup(doc->URL);
12878
122k
        }
12879
201k
    }
12880
201k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
201k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
201k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
201k
    nodePush(ctxt, newDoc->children);
12891
201k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
201k
    } else {
12894
201k
        ctxt->myDoc = doc;
12895
201k
        newRoot->doc = doc;
12896
201k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
201k
    GROW;
12904
201k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
196k
  start[0] = RAW;
12906
196k
  start[1] = NXT(1);
12907
196k
  start[2] = NXT(2);
12908
196k
  start[3] = NXT(3);
12909
196k
  enc = xmlDetectCharEncoding(start, 4);
12910
196k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
7.48k
      xmlSwitchEncoding(ctxt, enc);
12912
7.48k
  }
12913
196k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
201k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
4.88k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
4.88k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
4.88k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
208
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
208
                           "Version mismatch between document and entity\n");
12927
208
        }
12928
4.88k
    }
12929
12930
201k
    ctxt->instate = XML_PARSER_CONTENT;
12931
201k
    ctxt->depth = depth;
12932
201k
    if (oldctxt != NULL) {
12933
201k
  ctxt->_private = oldctxt->_private;
12934
201k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
201k
  ctxt->validate = oldctxt->validate;
12936
201k
  ctxt->valid = oldctxt->valid;
12937
201k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
201k
        if (oldctxt->validate) {
12939
92.7k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
92.7k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
92.7k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
92.7k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
92.7k
        }
12944
201k
  ctxt->external = oldctxt->external;
12945
201k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
201k
        ctxt->dict = oldctxt->dict;
12947
201k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
201k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
201k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
201k
        ctxt->dictNames = oldctxt->dictNames;
12951
201k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
201k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
201k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
201k
  ctxt->record_info = oldctxt->record_info;
12955
201k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
201k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
201k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
201k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
201k
    xmlParseContent(ctxt);
12970
12971
201k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
1.44k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
199k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
201k
    if (ctxt->node != newDoc->children) {
12977
7.23k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
7.23k
    }
12979
12980
201k
    if (!ctxt->wellFormed) {
12981
38.7k
  ret = (xmlParserErrors)ctxt->errNo;
12982
38.7k
        if (oldctxt != NULL) {
12983
38.7k
            oldctxt->errNo = ctxt->errNo;
12984
38.7k
            oldctxt->wellFormed = 0;
12985
38.7k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
38.7k
        }
12987
162k
    } else {
12988
162k
  if (list != NULL) {
12989
20.6k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
20.6k
      cur = newDoc->children->children;
12996
20.6k
      *list = cur;
12997
4.75M
      while (cur != NULL) {
12998
4.73M
    cur->parent = NULL;
12999
4.73M
    cur = cur->next;
13000
4.73M
      }
13001
20.6k
            newDoc->children->children = NULL;
13002
20.6k
  }
13003
162k
  ret = XML_ERR_OK;
13004
162k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
201k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
201k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
201k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
201k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
201k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
201k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
201k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
201k
    }
13020
13021
201k
    if (oldctxt != NULL) {
13022
201k
        ctxt->dict = NULL;
13023
201k
        ctxt->attsDefault = NULL;
13024
201k
        ctxt->attsSpecial = NULL;
13025
201k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
201k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
201k
        oldctxt->validate = ctxt->validate;
13028
201k
        oldctxt->valid = ctxt->valid;
13029
201k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
201k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
201k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
201k
    }
13033
201k
    ctxt->node_seq.maximum = 0;
13034
201k
    ctxt->node_seq.length = 0;
13035
201k
    ctxt->node_seq.buffer = NULL;
13036
201k
    xmlFreeParserCtxt(ctxt);
13037
201k
    newDoc->intSubset = NULL;
13038
201k
    newDoc->extSubset = NULL;
13039
201k
    xmlFreeDoc(newDoc);
13040
13041
201k
    return(ret);
13042
201k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
174k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
174k
    xmlParserCtxtPtr ctxt;
13125
174k
    xmlDocPtr newDoc = NULL;
13126
174k
    xmlNodePtr newRoot;
13127
174k
    xmlSAXHandlerPtr oldsax = NULL;
13128
174k
    xmlNodePtr content = NULL;
13129
174k
    xmlNodePtr last = NULL;
13130
174k
    int size;
13131
174k
    xmlParserErrors ret = XML_ERR_OK;
13132
174k
#ifdef SAX2
13133
174k
    int i;
13134
174k
#endif
13135
13136
174k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
174k
        (oldctxt->depth >  100)) {
13138
63
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
63
                       "Maximum entity nesting depth exceeded");
13140
63
  return(XML_ERR_ENTITY_LOOP);
13141
63
    }
13142
13143
13144
174k
    if (lst != NULL)
13145
172k
        *lst = NULL;
13146
174k
    if (string == NULL)
13147
119
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
174k
    size = xmlStrlen(string);
13150
13151
174k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
174k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
172k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
172k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
172k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
172k
    else
13158
172k
  ctxt->userData = ctxt;
13159
172k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
172k
    ctxt->dict = oldctxt->dict;
13161
172k
    ctxt->input_id = oldctxt->input_id;
13162
172k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
172k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
172k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
172k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
172k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
381
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
381
    }
13171
172k
#endif
13172
13173
172k
    oldsax = ctxt->sax;
13174
172k
    ctxt->sax = oldctxt->sax;
13175
172k
    xmlDetectSAX2(ctxt);
13176
172k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
172k
    ctxt->options = oldctxt->options;
13178
13179
172k
    ctxt->_private = oldctxt->_private;
13180
172k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
172k
    } else {
13193
172k
  ctxt->myDoc = oldctxt->myDoc;
13194
172k
        content = ctxt->myDoc->children;
13195
172k
  last = ctxt->myDoc->last;
13196
172k
    }
13197
172k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
172k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
172k
    ctxt->myDoc->children = NULL;
13208
172k
    ctxt->myDoc->last = NULL;
13209
172k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
172k
    nodePush(ctxt, ctxt->myDoc->children);
13211
172k
    ctxt->instate = XML_PARSER_CONTENT;
13212
172k
    ctxt->depth = oldctxt->depth;
13213
13214
172k
    ctxt->validate = 0;
13215
172k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
172k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
141k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
141k
    }
13222
172k
    ctxt->dictNames = oldctxt->dictNames;
13223
172k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
172k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
172k
    xmlParseContent(ctxt);
13227
172k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
400
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
171k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
172k
    if (ctxt->node != ctxt->myDoc->children) {
13233
1.50k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
1.50k
    }
13235
13236
172k
    if (!ctxt->wellFormed) {
13237
15.4k
  ret = (xmlParserErrors)ctxt->errNo;
13238
15.4k
        oldctxt->errNo = ctxt->errNo;
13239
15.4k
        oldctxt->wellFormed = 0;
13240
15.4k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
156k
    } else {
13242
156k
        ret = XML_ERR_OK;
13243
156k
    }
13244
13245
172k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
156k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
156k
  cur = ctxt->myDoc->children->children;
13253
156k
  *lst = cur;
13254
394k
  while (cur != NULL) {
13255
238k
#ifdef LIBXML_VALID_ENABLED
13256
238k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
238k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
238k
    (cur->type == XML_ELEMENT_NODE)) {
13259
25.1k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
25.1k
      oldctxt->myDoc, cur);
13261
25.1k
      }
13262
238k
#endif /* LIBXML_VALID_ENABLED */
13263
238k
      cur->parent = NULL;
13264
238k
      cur = cur->next;
13265
238k
  }
13266
156k
  ctxt->myDoc->children->children = NULL;
13267
156k
    }
13268
172k
    if (ctxt->myDoc != NULL) {
13269
172k
  xmlFreeNode(ctxt->myDoc->children);
13270
172k
        ctxt->myDoc->children = content;
13271
172k
        ctxt->myDoc->last = last;
13272
172k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
172k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
172k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
172k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
172k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
172k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
172k
    }
13285
13286
172k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
172k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
172k
    ctxt->sax = oldsax;
13289
172k
    ctxt->dict = NULL;
13290
172k
    ctxt->attsDefault = NULL;
13291
172k
    ctxt->attsSpecial = NULL;
13292
172k
    xmlFreeParserCtxt(ctxt);
13293
172k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
172k
    return(ret);
13298
172k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
821k
        xmlParserCtxtPtr pctx) {
13783
821k
    xmlParserCtxtPtr ctxt;
13784
821k
    xmlParserInputPtr inputStream;
13785
821k
    char *directory = NULL;
13786
821k
    xmlChar *uri;
13787
13788
821k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
821k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
821k
    if (pctx != NULL) {
13794
821k
        ctxt->options = pctx->options;
13795
821k
        ctxt->_private = pctx->_private;
13796
821k
  ctxt->input_id = pctx->input_id;
13797
821k
    }
13798
13799
    /* Don't read from stdin. */
13800
821k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
122
        URL = BAD_CAST "./-";
13802
13803
821k
    uri = xmlBuildURI(URL, base);
13804
13805
821k
    if (uri == NULL) {
13806
17.6k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
17.6k
  if (inputStream == NULL) {
13808
17.3k
      xmlFreeParserCtxt(ctxt);
13809
17.3k
      return(NULL);
13810
17.3k
  }
13811
13812
345
  inputPush(ctxt, inputStream);
13813
13814
345
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
345
      directory = xmlParserGetDirectory((char *)URL);
13816
345
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
345
      ctxt->directory = directory;
13818
803k
    } else {
13819
803k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
803k
  if (inputStream == NULL) {
13821
602k
      xmlFree(uri);
13822
602k
      xmlFreeParserCtxt(ctxt);
13823
602k
      return(NULL);
13824
602k
  }
13825
13826
201k
  inputPush(ctxt, inputStream);
13827
13828
201k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
201k
      directory = xmlParserGetDirectory((char *)uri);
13830
201k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
201k
      ctxt->directory = directory;
13832
201k
  xmlFree(uri);
13833
201k
    }
13834
201k
    return(ctxt);
13835
821k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
496k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
496k
    xmlParserCtxtPtr ctxt;
14178
496k
    xmlParserInputPtr input;
14179
496k
    xmlParserInputBufferPtr buf;
14180
14181
496k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
496k
    if (size <= 0)
14184
3.01k
  return(NULL);
14185
14186
493k
    ctxt = xmlNewParserCtxt();
14187
493k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
493k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
493k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
493k
    input = xmlNewInputStream(ctxt);
14197
493k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
493k
    input->filename = NULL;
14204
493k
    input->buf = buf;
14205
493k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
493k
    inputPush(ctxt, input);
14208
493k
    return(ctxt);
14209
493k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
3.88G
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
3.88G
    if (xmlParserInitialized != 0)
14525
3.88G
  return;
14526
14527
3.51k
#ifdef LIBXML_THREAD_ENABLED
14528
3.51k
    __xmlGlobalInitMutexLock();
14529
3.51k
    if (xmlParserInitialized == 0) {
14530
3.51k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.51k
  xmlInitThreadsInternal();
14537
3.51k
  xmlInitGlobalsInternal();
14538
3.51k
  xmlInitMemoryInternal();
14539
3.51k
        __xmlInitializeDict();
14540
3.51k
  xmlInitEncodingInternal();
14541
3.51k
  xmlRegisterDefaultInputCallbacks();
14542
3.51k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.51k
  xmlRegisterDefaultOutputCallbacks();
14544
3.51k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.51k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.51k
  xmlInitXPathInternal();
14547
3.51k
#endif
14548
3.51k
  xmlParserInitialized = 1;
14549
3.51k
#ifdef LIBXML_THREAD_ENABLED
14550
3.51k
    }
14551
3.51k
    __xmlGlobalInitMutexUnlock();
14552
3.51k
#endif
14553
3.51k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
964k
{
14843
964k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
964k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
964k
    if (options & XML_PARSE_RECOVER) {
14851
534k
        ctxt->recovery = 1;
14852
534k
        options -= XML_PARSE_RECOVER;
14853
534k
  ctxt->options |= XML_PARSE_RECOVER;
14854
534k
    } else
14855
430k
        ctxt->recovery = 0;
14856
964k
    if (options & XML_PARSE_DTDLOAD) {
14857
709k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
709k
        options -= XML_PARSE_DTDLOAD;
14859
709k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
709k
    } else
14861
254k
        ctxt->loadsubset = 0;
14862
964k
    if (options & XML_PARSE_DTDATTR) {
14863
380k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
380k
        options -= XML_PARSE_DTDATTR;
14865
380k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
380k
    }
14867
964k
    if (options & XML_PARSE_NOENT) {
14868
642k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
642k
        options -= XML_PARSE_NOENT;
14871
642k
  ctxt->options |= XML_PARSE_NOENT;
14872
642k
    } else
14873
321k
        ctxt->replaceEntities = 0;
14874
964k
    if (options & XML_PARSE_PEDANTIC) {
14875
200k
        ctxt->pedantic = 1;
14876
200k
        options -= XML_PARSE_PEDANTIC;
14877
200k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
200k
    } else
14879
764k
        ctxt->pedantic = 0;
14880
964k
    if (options & XML_PARSE_NOBLANKS) {
14881
291k
        ctxt->keepBlanks = 0;
14882
291k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
291k
        options -= XML_PARSE_NOBLANKS;
14884
291k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
291k
    } else
14886
672k
        ctxt->keepBlanks = 1;
14887
964k
    if (options & XML_PARSE_DTDVALID) {
14888
421k
        ctxt->validate = 1;
14889
421k
        if (options & XML_PARSE_NOWARNING)
14890
263k
            ctxt->vctxt.warning = NULL;
14891
421k
        if (options & XML_PARSE_NOERROR)
14892
303k
            ctxt->vctxt.error = NULL;
14893
421k
        options -= XML_PARSE_DTDVALID;
14894
421k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
421k
    } else
14896
543k
        ctxt->validate = 0;
14897
964k
    if (options & XML_PARSE_NOWARNING) {
14898
338k
        ctxt->sax->warning = NULL;
14899
338k
        options -= XML_PARSE_NOWARNING;
14900
338k
    }
14901
964k
    if (options & XML_PARSE_NOERROR) {
14902
440k
        ctxt->sax->error = NULL;
14903
440k
        ctxt->sax->fatalError = NULL;
14904
440k
        options -= XML_PARSE_NOERROR;
14905
440k
    }
14906
964k
#ifdef LIBXML_SAX1_ENABLED
14907
964k
    if (options & XML_PARSE_SAX1) {
14908
344k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
344k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
344k
        ctxt->sax->startElementNs = NULL;
14911
344k
        ctxt->sax->endElementNs = NULL;
14912
344k
        ctxt->sax->initialized = 1;
14913
344k
        options -= XML_PARSE_SAX1;
14914
344k
  ctxt->options |= XML_PARSE_SAX1;
14915
344k
    }
14916
964k
#endif /* LIBXML_SAX1_ENABLED */
14917
964k
    if (options & XML_PARSE_NODICT) {
14918
298k
        ctxt->dictNames = 0;
14919
298k
        options -= XML_PARSE_NODICT;
14920
298k
  ctxt->options |= XML_PARSE_NODICT;
14921
665k
    } else {
14922
665k
        ctxt->dictNames = 1;
14923
665k
    }
14924
964k
    if (options & XML_PARSE_NOCDATA) {
14925
336k
        ctxt->sax->cdataBlock = NULL;
14926
336k
        options -= XML_PARSE_NOCDATA;
14927
336k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
336k
    }
14929
964k
    if (options & XML_PARSE_NSCLEAN) {
14930
381k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
381k
        options -= XML_PARSE_NSCLEAN;
14932
381k
    }
14933
964k
    if (options & XML_PARSE_NONET) {
14934
307k
  ctxt->options |= XML_PARSE_NONET;
14935
307k
        options -= XML_PARSE_NONET;
14936
307k
    }
14937
964k
    if (options & XML_PARSE_COMPACT) {
14938
490k
  ctxt->options |= XML_PARSE_COMPACT;
14939
490k
        options -= XML_PARSE_COMPACT;
14940
490k
    }
14941
964k
    if (options & XML_PARSE_OLD10) {
14942
313k
  ctxt->options |= XML_PARSE_OLD10;
14943
313k
        options -= XML_PARSE_OLD10;
14944
313k
    }
14945
964k
    if (options & XML_PARSE_NOBASEFIX) {
14946
352k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
352k
        options -= XML_PARSE_NOBASEFIX;
14948
352k
    }
14949
964k
    if (options & XML_PARSE_HUGE) {
14950
302k
  ctxt->options |= XML_PARSE_HUGE;
14951
302k
        options -= XML_PARSE_HUGE;
14952
302k
        if (ctxt->dict != NULL)
14953
302k
            xmlDictSetLimit(ctxt->dict, 0);
14954
302k
    }
14955
964k
    if (options & XML_PARSE_OLDSAX) {
14956
275k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
275k
        options -= XML_PARSE_OLDSAX;
14958
275k
    }
14959
964k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
336k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
336k
        options -= XML_PARSE_IGNORE_ENC;
14962
336k
    }
14963
964k
    if (options & XML_PARSE_BIG_LINES) {
14964
293k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
293k
        options -= XML_PARSE_BIG_LINES;
14966
293k
    }
14967
964k
    ctxt->linenumbers = 1;
14968
964k
    return (options);
14969
964k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
643k
{
14984
643k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
643k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
321k
{
15003
321k
    xmlDocPtr ret;
15004
15005
321k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
321k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
321k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
321k
        (ctxt->input->filename == NULL))
15015
321k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
321k
    xmlParseDocument(ctxt);
15017
321k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
191k
        ret = ctxt->myDoc;
15019
129k
    else {
15020
129k
        ret = NULL;
15021
129k
  if (ctxt->myDoc != NULL) {
15022
113k
      xmlFreeDoc(ctxt->myDoc);
15023
113k
  }
15024
129k
    }
15025
321k
    ctxt->myDoc = NULL;
15026
321k
    if (!reuse) {
15027
321k
  xmlFreeParserCtxt(ctxt);
15028
321k
    }
15029
15030
321k
    return (ret);
15031
321k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
321k
{
15096
321k
    xmlParserCtxtPtr ctxt;
15097
15098
321k
    xmlInitParser();
15099
321k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
321k
    if (ctxt == NULL)
15101
703
        return (NULL);
15102
321k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
321k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387