Coverage Report

Created: 2024-05-15 07:10

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
177M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
20.5k
#define XML_PARSER_NON_LINEAR 10
129
130
1.71G
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
2.64G
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
440G
#define XML_PARSER_BUFFER_SIZE 100
147
13.9M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
900M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
16.7k
{
215
16.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
16.7k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
16.7k
    if (ctxt != NULL)
219
16.7k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
16.7k
    if (prefix == NULL)
222
14.5k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
14.5k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
14.5k
                        (const char *) localname, NULL, NULL, 0, 0,
225
14.5k
                        "Attribute %s redefined\n", localname);
226
2.15k
    else
227
2.15k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
2.15k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
2.15k
                        (const char *) prefix, (const char *) localname,
230
2.15k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
2.15k
                        localname);
232
16.7k
    if (ctxt != NULL) {
233
16.7k
  ctxt->wellFormed = 0;
234
16.7k
  if (ctxt->recovery == 0)
235
9.14k
      ctxt->disableSAX = 1;
236
16.7k
    }
237
16.7k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
34.1M
{
250
34.1M
    const char *errmsg;
251
252
34.1M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
34.1M
        (ctxt->instate == XML_PARSER_EOF))
254
84.2k
  return;
255
34.0M
    switch (error) {
256
43.7k
        case XML_ERR_INVALID_HEX_CHARREF:
257
43.7k
            errmsg = "CharRef: invalid hexadecimal value";
258
43.7k
            break;
259
63.2k
        case XML_ERR_INVALID_DEC_CHARREF:
260
63.2k
            errmsg = "CharRef: invalid decimal value";
261
63.2k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
2.37M
        case XML_ERR_INTERNAL_ERROR:
266
2.37M
            errmsg = "internal error";
267
2.37M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
4.64M
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
4.64M
            errmsg = "PEReference: expecting ';'";
282
4.64M
            break;
283
3.35k
        case XML_ERR_ENTITY_LOOP:
284
3.35k
            errmsg = "Detected an entity reference loop";
285
3.35k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
11.4k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
11.4k
            errmsg = "PEReferences forbidden in internal subset";
291
11.4k
            break;
292
12.0k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
12.0k
            errmsg = "EntityValue: \" or ' expected";
294
12.0k
            break;
295
76.9k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
76.9k
            errmsg = "AttValue: \" or ' expected";
297
76.9k
            break;
298
337k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
337k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
337k
            break;
301
45.2k
        case XML_ERR_LITERAL_NOT_STARTED:
302
45.2k
            errmsg = "SystemLiteral \" or ' expected";
303
45.2k
            break;
304
50.3k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
50.3k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
50.3k
            break;
307
44.7k
        case XML_ERR_MISPLACED_CDATA_END:
308
44.7k
            errmsg = "Sequence ']]>' not allowed in content";
309
44.7k
            break;
310
43.7k
        case XML_ERR_URI_REQUIRED:
311
43.7k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
43.7k
            break;
313
2.35k
        case XML_ERR_PUBID_REQUIRED:
314
2.35k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
2.35k
            break;
316
24.6M
        case XML_ERR_HYPHEN_IN_COMMENT:
317
24.6M
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
24.6M
            break;
319
18.0k
        case XML_ERR_PI_NOT_STARTED:
320
18.0k
            errmsg = "xmlParsePI : no target name";
321
18.0k
            break;
322
755
        case XML_ERR_RESERVED_XML_NAME:
323
755
            errmsg = "Invalid PI name";
324
755
            break;
325
1.01k
        case XML_ERR_NOTATION_NOT_STARTED:
326
1.01k
            errmsg = "NOTATION: Name expected here";
327
1.01k
            break;
328
6.62k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
6.62k
            errmsg = "'>' required to close NOTATION declaration";
330
6.62k
            break;
331
30.4k
        case XML_ERR_VALUE_REQUIRED:
332
30.4k
            errmsg = "Entity value required";
333
30.4k
            break;
334
1.08k
        case XML_ERR_URI_FRAGMENT:
335
1.08k
            errmsg = "Fragment not allowed";
336
1.08k
            break;
337
46.2k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
46.2k
            errmsg = "'(' required to start ATTLIST enumeration";
339
46.2k
            break;
340
621
        case XML_ERR_NMTOKEN_REQUIRED:
341
621
            errmsg = "NmToken expected in ATTLIST enumeration";
342
621
            break;
343
3.00k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
3.00k
            errmsg = "')' required to finish ATTLIST enumeration";
345
3.00k
            break;
346
3.82k
        case XML_ERR_MIXED_NOT_STARTED:
347
3.82k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
3.82k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
27.5k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
27.5k
            errmsg = "ContentDecl : Name or '(' expected";
354
27.5k
            break;
355
19.4k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
19.4k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
19.4k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
306k
        case XML_ERR_GT_REQUIRED:
363
306k
            errmsg = "expected '>'";
364
306k
            break;
365
2.67k
        case XML_ERR_CONDSEC_INVALID:
366
2.67k
            errmsg = "XML conditional section '[' expected";
367
2.67k
            break;
368
86.0k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
86.0k
            errmsg = "Content error in the external subset";
370
86.0k
            break;
371
10.9k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
10.9k
            errmsg =
373
10.9k
                "conditional section INCLUDE or IGNORE keyword expected";
374
10.9k
            break;
375
2.48k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.48k
            errmsg = "XML conditional section not closed";
377
2.48k
            break;
378
1.95k
        case XML_ERR_XMLDECL_NOT_STARTED:
379
1.95k
            errmsg = "Text declaration '<?xml' required";
380
1.95k
            break;
381
188k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
188k
            errmsg = "parsing XML declaration: '?>' expected";
383
188k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
365k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
365k
            errmsg = "EntityRef: expecting ';'";
389
365k
            break;
390
67.9k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
67.9k
            errmsg = "DOCTYPE improperly terminated";
392
67.9k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
12.2k
        case XML_ERR_EQUAL_REQUIRED:
397
12.2k
            errmsg = "expected '='";
398
12.2k
            break;
399
57.7k
        case XML_ERR_STRING_NOT_CLOSED:
400
57.7k
            errmsg = "String not closed expecting \" or '";
401
57.7k
            break;
402
4.59k
        case XML_ERR_STRING_NOT_STARTED:
403
4.59k
            errmsg = "String not started expecting ' or \"";
404
4.59k
            break;
405
1.87k
        case XML_ERR_ENCODING_NAME:
406
1.87k
            errmsg = "Invalid XML encoding name";
407
1.87k
            break;
408
4.46k
        case XML_ERR_STANDALONE_VALUE:
409
4.46k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
4.46k
            break;
411
39.3k
        case XML_ERR_DOCUMENT_EMPTY:
412
39.3k
            errmsg = "Document is empty";
413
39.3k
            break;
414
244k
        case XML_ERR_DOCUMENT_END:
415
244k
            errmsg = "Extra content at the end of the document";
416
244k
            break;
417
15.9k
        case XML_ERR_NOT_WELL_BALANCED:
418
15.9k
            errmsg = "chunk is not well balanced";
419
15.9k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
77.6k
        case XML_ERR_VERSION_MISSING:
424
77.6k
            errmsg = "Malformed declaration expecting version";
425
77.6k
            break;
426
5.27k
        case XML_ERR_NAME_TOO_LONG:
427
5.27k
            errmsg = "Name too long";
428
5.27k
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
353
        default:
435
353
            errmsg = "Unregistered error message";
436
34.0M
    }
437
34.0M
    if (ctxt != NULL)
438
34.0M
  ctxt->errNo = error;
439
34.0M
    if (info == NULL) {
440
31.6M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
31.6M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
31.6M
                        errmsg);
443
31.6M
    } else {
444
2.38M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
2.38M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
2.38M
                        errmsg, info);
447
2.38M
    }
448
34.0M
    if (ctxt != NULL) {
449
34.0M
  ctxt->wellFormed = 0;
450
34.0M
  if (ctxt->recovery == 0)
451
8.44M
      ctxt->disableSAX = 1;
452
34.0M
    }
453
34.0M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
11.0M
{
467
11.0M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
11.0M
        (ctxt->instate == XML_PARSER_EOF))
469
79
  return;
470
11.0M
    if (ctxt != NULL)
471
11.0M
  ctxt->errNo = error;
472
11.0M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
11.0M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
11.0M
    if (ctxt != NULL) {
475
11.0M
  ctxt->wellFormed = 0;
476
11.0M
  if (ctxt->recovery == 0)
477
5.83M
      ctxt->disableSAX = 1;
478
11.0M
    }
479
11.0M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
108M
{
495
108M
    xmlStructuredErrorFunc schannel = NULL;
496
497
108M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
108M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
108M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
108M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
108M
        schannel = ctxt->sax->serror;
503
108M
    if (ctxt != NULL) {
504
108M
        __xmlRaiseError(schannel,
505
108M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
108M
                    ctxt->userData,
507
108M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
108M
                    XML_ERR_WARNING, NULL, 0,
509
108M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
108M
        msg, (const char *) str1, (const char *) str2);
511
108M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
108M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
465k
{
533
465k
    xmlStructuredErrorFunc schannel = NULL;
534
535
465k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
465k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
465k
    if (ctxt != NULL) {
539
465k
  ctxt->errNo = error;
540
465k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
200k
      schannel = ctxt->sax->serror;
542
465k
    }
543
465k
    if (ctxt != NULL) {
544
465k
        __xmlRaiseError(schannel,
545
465k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
465k
                    ctxt, NULL, XML_FROM_DTD, error,
547
465k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
465k
        (const char *) str2, NULL, 0, 0,
549
465k
        msg, (const char *) str1, (const char *) str2);
550
465k
  ctxt->valid = 0;
551
465k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
465k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
85.2M
{
573
85.2M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
85.2M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
85.2M
    if (ctxt != NULL)
577
85.2M
  ctxt->errNo = error;
578
85.2M
    __xmlRaiseError(NULL, NULL, NULL,
579
85.2M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
85.2M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
85.2M
    if (ctxt != NULL) {
582
85.2M
  ctxt->wellFormed = 0;
583
85.2M
  if (ctxt->recovery == 0)
584
9.35M
      ctxt->disableSAX = 1;
585
85.2M
    }
586
85.2M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
2.80M
{
604
2.80M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
2.80M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
2.80M
    if (ctxt != NULL)
608
2.80M
  ctxt->errNo = error;
609
2.80M
    __xmlRaiseError(NULL, NULL, NULL,
610
2.80M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
2.80M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
2.80M
        NULL, val, 0, msg, str1, val, str2);
613
2.80M
    if (ctxt != NULL) {
614
2.80M
  ctxt->wellFormed = 0;
615
2.80M
  if (ctxt->recovery == 0)
616
897k
      ctxt->disableSAX = 1;
617
2.80M
    }
618
2.80M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
361M
{
633
361M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
361M
        (ctxt->instate == XML_PARSER_EOF))
635
3
  return;
636
361M
    if (ctxt != NULL)
637
361M
  ctxt->errNo = error;
638
361M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
361M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
361M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
361M
                    val);
642
361M
    if (ctxt != NULL) {
643
361M
  ctxt->wellFormed = 0;
644
361M
  if (ctxt->recovery == 0)
645
338M
      ctxt->disableSAX = 1;
646
361M
    }
647
361M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
15.9M
{
662
15.9M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
15.9M
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
15.9M
    if (ctxt != NULL)
666
15.9M
  ctxt->errNo = error;
667
15.9M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
15.9M
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
15.9M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
15.9M
                    val);
671
15.9M
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
775k
{
689
775k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
775k
        (ctxt->instate == XML_PARSER_EOF))
691
11
  return;
692
775k
    if (ctxt != NULL)
693
775k
  ctxt->errNo = error;
694
775k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
775k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
775k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
775k
                    info1, info2, info3);
698
775k
    if (ctxt != NULL)
699
775k
  ctxt->nsWellFormed = 0;
700
775k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
6.79k
{
718
6.79k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
6.79k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
6.79k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
6.79k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
6.79k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
6.79k
                    info1, info2, info3);
725
6.79k
}
726
727
static void
728
6.01G
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
6.01G
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
6.01G
    else
732
6.01G
        *dst += val;
733
6.01G
}
734
735
static void
736
1.73G
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
1.73G
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
1.73G
    else
740
1.73G
        *dst += val;
741
1.73G
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
1.71G
{
770
1.71G
    unsigned long consumed;
771
1.71G
    xmlParserInputPtr input = ctxt->input;
772
1.71G
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
1.71G
    consumed = input->parentConsumed;
779
1.71G
    if ((entity == NULL) ||
780
1.71G
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
876M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
843M
        xmlSaturatedAdd(&consumed, input->consumed);
783
843M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
843M
    }
785
1.71G
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
1.71G
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
1.71G
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
1.71G
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
1.71G
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
20.5k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
20.5k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
20.5k
                       "Maximum entity amplification factor exceeded");
803
20.5k
        xmlHaltParser(ctxt);
804
20.5k
        return(1);
805
20.5k
    }
806
807
1.71G
    return(0);
808
1.71G
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
11.4M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
11.4M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
11.4M
    (void) sax;
1048
1049
11.4M
    if (ctxt == NULL) return;
1050
11.4M
    sax = ctxt->sax;
1051
11.4M
#ifdef LIBXML_SAX1_ENABLED
1052
11.4M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
11.4M
        ((sax->startElementNs != NULL) ||
1054
7.39M
         (sax->endElementNs != NULL) ||
1055
7.39M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
7.39M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
11.4M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
11.4M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
11.4M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
11.4M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
11.4M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
11.4M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
831k
{
1103
831k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
834k
    while (*src == 0x20) src++;
1107
214M
    while (*src != 0) {
1108
213M
  if (*src == 0x20) {
1109
1.93M
      while (*src == 0x20) src++;
1110
706k
      if (*src != 0)
1111
661k
    *dst++ = 0x20;
1112
212M
  } else {
1113
212M
      *dst++ = *src++;
1114
212M
  }
1115
213M
    }
1116
831k
    *dst = 0;
1117
831k
    if (dst == src)
1118
777k
       return(NULL);
1119
53.6k
    return(dst);
1120
831k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
102k
{
1136
102k
    int i;
1137
102k
    int remove_head = 0;
1138
102k
    int need_realloc = 0;
1139
102k
    const xmlChar *cur;
1140
1141
102k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
102k
    i = *len;
1144
102k
    if (i <= 0)
1145
1.41k
        return(NULL);
1146
1147
101k
    cur = src;
1148
154k
    while (*cur == 0x20) {
1149
52.7k
        cur++;
1150
52.7k
  remove_head++;
1151
52.7k
    }
1152
37.5M
    while (*cur != 0) {
1153
37.4M
  if (*cur == 0x20) {
1154
702k
      cur++;
1155
702k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
15.2k
          need_realloc = 1;
1157
15.2k
    break;
1158
15.2k
      }
1159
702k
  } else
1160
36.7M
      cur++;
1161
37.4M
    }
1162
101k
    if (need_realloc) {
1163
15.2k
        xmlChar *ret;
1164
1165
15.2k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
15.2k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
15.2k
  xmlAttrNormalizeSpace(ret, ret);
1171
15.2k
  *len = strlen((const char *)ret);
1172
15.2k
        return(ret);
1173
86.1k
    } else if (remove_head) {
1174
2.24k
        *len -= remove_head;
1175
2.24k
        memmove(src, src + remove_head, 1 + *len);
1176
2.24k
  return(src);
1177
2.24k
    }
1178
83.9k
    return(NULL);
1179
101k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
4.57M
               const xmlChar *value) {
1195
4.57M
    xmlDefAttrsPtr defaults;
1196
4.57M
    int len;
1197
4.57M
    const xmlChar *name;
1198
4.57M
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
4.57M
    if (ctxt->attsSpecial != NULL) {
1204
4.50M
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
1.94M
      return;
1206
4.50M
    }
1207
1208
2.63M
    if (ctxt->attsDefault == NULL) {
1209
168k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
168k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
168k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
2.63M
    name = xmlSplitQName3(fullname, &len);
1219
2.63M
    if (name == NULL) {
1220
2.60M
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
2.60M
  prefix = NULL;
1222
2.60M
    } else {
1223
26.0k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
26.0k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
26.0k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
2.63M
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
2.63M
    if (defaults == NULL) {
1232
1.15M
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
1.15M
                     (4 * 5) * sizeof(const xmlChar *));
1234
1.15M
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
1.15M
  defaults->nbAttrs = 0;
1237
1.15M
  defaults->maxAttrs = 4;
1238
1.15M
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
1.15M
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
1.47M
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
5.22k
        xmlDefAttrsPtr temp;
1245
1246
5.22k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
5.22k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
5.22k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
5.22k
  defaults = temp;
1251
5.22k
  defaults->maxAttrs *= 2;
1252
5.22k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
5.22k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
5.22k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
2.63M
    name = xmlSplitQName3(fullattr, &len);
1264
2.63M
    if (name == NULL) {
1265
2.25M
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
2.25M
  prefix = NULL;
1267
2.25M
    } else {
1268
377k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
377k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
377k
    }
1271
1272
2.63M
    defaults->values[5 * defaults->nbAttrs] = name;
1273
2.63M
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
2.63M
    len = xmlStrlen(value);
1276
2.63M
    value = xmlDictLookup(ctxt->dict, value, len);
1277
2.63M
    if (value == NULL)
1278
0
        goto mem_error;
1279
2.63M
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
2.63M
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
2.63M
    if (ctxt->external)
1282
2.16M
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
461k
    else
1284
461k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
2.63M
    defaults->nbAttrs++;
1286
1287
2.63M
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
2.63M
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
53.0M
{
1309
53.0M
    if (ctxt->attsSpecial == NULL) {
1310
322k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
322k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
322k
    }
1314
1315
53.0M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
21.3M
        return;
1317
1318
31.7M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
31.7M
                     (void *) (ptrdiff_t) type);
1320
31.7M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
53.0M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
26.9M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
26.9M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
26.9M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
8.71M
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
8.71M
    }
1341
26.9M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
789k
{
1354
789k
    if (ctxt->attsSpecial == NULL)
1355
560k
        return;
1356
1357
228k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
228k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
31.3k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
31.3k
        ctxt->attsSpecial = NULL;
1362
31.3k
    }
1363
228k
    return;
1364
789k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
22.0k
{
1427
22.0k
    const xmlChar *cur = lang, *nxt;
1428
1429
22.0k
    if (cur == NULL)
1430
121
        return (0);
1431
21.9k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
21.9k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
21.9k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
21.9k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
0
        cur += 2;
1441
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
0
            cur++;
1444
0
        return(cur[0] == 0);
1445
0
    }
1446
21.9k
    nxt = cur;
1447
557k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
557k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
535k
           nxt++;
1450
21.9k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
403
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
375
            return(0);
1456
28
        return(1);
1457
403
    }
1458
21.5k
    if (nxt - cur < 2)
1459
557
        return(0);
1460
    /* we got an ISO 639 code */
1461
20.9k
    if (nxt[0] == 0)
1462
20.2k
        return(1);
1463
699
    if (nxt[0] != '-')
1464
277
        return(0);
1465
1466
422
    nxt++;
1467
422
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
422
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
26
        goto region_m49;
1471
1472
29.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
29.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
29.5k
           nxt++;
1475
396
    if (nxt - cur == 4)
1476
3
        goto script;
1477
393
    if (nxt - cur == 2)
1478
231
        goto region;
1479
162
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
11
        goto variant;
1481
151
    if (nxt - cur != 3)
1482
139
        return(0);
1483
    /* we parsed an extlang */
1484
12
    if (nxt[0] == 0)
1485
0
        return(1);
1486
12
    if (nxt[0] != '-')
1487
10
        return(0);
1488
1489
2
    nxt++;
1490
2
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
2
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
0
        goto region_m49;
1494
1495
2
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
2
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
0
           nxt++;
1498
2
    if (nxt - cur == 2)
1499
0
        goto region;
1500
2
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
0
        goto variant;
1502
2
    if (nxt - cur != 4)
1503
2
        return(0);
1504
    /* we parsed a script */
1505
3
script:
1506
3
    if (nxt[0] == 0)
1507
0
        return(1);
1508
3
    if (nxt[0] != '-')
1509
3
        return(0);
1510
1511
0
    nxt++;
1512
0
    cur = nxt;
1513
    /* now we can have region or variant */
1514
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
0
        goto region_m49;
1516
1517
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
0
           nxt++;
1520
1521
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
0
        goto variant;
1523
0
    if (nxt - cur != 2)
1524
0
        return(0);
1525
    /* we parsed a region */
1526
231
region:
1527
231
    if (nxt[0] == 0)
1528
222
        return(1);
1529
9
    if (nxt[0] != '-')
1530
7
        return(0);
1531
1532
2
    nxt++;
1533
2
    cur = nxt;
1534
    /* now we can just have a variant */
1535
10
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
10
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
8
           nxt++;
1538
1539
2
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
2
        return(0);
1541
1542
    /* we parsed a variant */
1543
11
variant:
1544
11
    if (nxt[0] == 0)
1545
0
        return(1);
1546
11
    if (nxt[0] != '-')
1547
11
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
0
    return (1);
1550
1551
26
region_m49:
1552
26
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
26
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
0
        nxt += 3;
1555
0
        goto region;
1556
0
    }
1557
26
    return(0);
1558
26
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
445k
{
1584
445k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
51.5k
        int i;
1586
91.2k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
44.5k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
4.87k
          if (ctxt->nsTab[i + 1] == URL)
1590
2.30k
        return(-2);
1591
    /* out of scope keep it */
1592
2.57k
    break;
1593
4.87k
      }
1594
44.5k
  }
1595
51.5k
    }
1596
443k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
222k
  ctxt->nsMax = 10;
1598
222k
  ctxt->nsNr = 0;
1599
222k
  ctxt->nsTab = (const xmlChar **)
1600
222k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
222k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
222k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
4.77k
        const xmlChar ** tmp;
1608
4.77k
        ctxt->nsMax *= 2;
1609
4.77k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
4.77k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
4.77k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
4.77k
  ctxt->nsTab = tmp;
1617
4.77k
    }
1618
443k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
443k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
443k
    return (ctxt->nsNr);
1621
443k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
138k
{
1634
138k
    int i;
1635
1636
138k
    if (ctxt->nsTab == NULL) return(0);
1637
138k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
138k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
542k
    for (i = 0;i < nr;i++) {
1645
403k
         ctxt->nsNr--;
1646
403k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
403k
    }
1648
138k
    return(nr);
1649
138k
}
1650
#endif
1651
1652
static int
1653
477k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
477k
    const xmlChar **atts;
1655
477k
    int *attallocs;
1656
477k
    int maxatts;
1657
1658
477k
    if (nr + 5 > ctxt->maxatts) {
1659
477k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
477k
  atts = (const xmlChar **) xmlMalloc(
1661
477k
             maxatts * sizeof(const xmlChar *));
1662
477k
  if (atts == NULL) goto mem_error;
1663
477k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
477k
                               (maxatts / 5) * sizeof(int));
1665
477k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
477k
        if (ctxt->maxatts > 0)
1670
599
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
477k
        xmlFree(ctxt->atts);
1672
477k
  ctxt->atts = atts;
1673
477k
  ctxt->attallocs = attallocs;
1674
477k
  ctxt->maxatts = maxatts;
1675
477k
    }
1676
477k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
477k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
879M
{
1694
879M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
879M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
23.3k
        size_t newSize = ctxt->inputMax * 2;
1698
23.3k
        xmlParserInputPtr *tmp;
1699
1700
23.3k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
23.3k
                                               newSize * sizeof(*tmp));
1702
23.3k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
23.3k
        ctxt->inputTab = tmp;
1707
23.3k
        ctxt->inputMax = newSize;
1708
23.3k
    }
1709
879M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
879M
    ctxt->input = value;
1711
879M
    return (ctxt->inputNr++);
1712
879M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
885M
{
1724
885M
    xmlParserInputPtr ret;
1725
1726
885M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
885M
    if (ctxt->inputNr <= 0)
1729
6.31M
        return (NULL);
1730
878M
    ctxt->inputNr--;
1731
878M
    if (ctxt->inputNr > 0)
1732
876M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
2.79M
    else
1734
2.79M
        ctxt->input = NULL;
1735
878M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
878M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
878M
    return (ret);
1738
885M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
382M
{
1751
382M
    if (ctxt == NULL) return(0);
1752
382M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
105k
        xmlNodePtr *tmp;
1754
1755
105k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
105k
                                      ctxt->nodeMax * 2 *
1757
105k
                                      sizeof(ctxt->nodeTab[0]));
1758
105k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
105k
        ctxt->nodeTab = tmp;
1763
105k
  ctxt->nodeMax *= 2;
1764
105k
    }
1765
382M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
382M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
5
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
5
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
5
        xmlParserMaxDepth);
1770
5
  xmlHaltParser(ctxt);
1771
5
  return(-1);
1772
5
    }
1773
382M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
382M
    ctxt->node = value;
1775
382M
    return (ctxt->nodeNr++);
1776
382M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
379M
{
1789
379M
    xmlNodePtr ret;
1790
1791
379M
    if (ctxt == NULL) return(NULL);
1792
379M
    if (ctxt->nodeNr <= 0)
1793
306k
        return (NULL);
1794
379M
    ctxt->nodeNr--;
1795
379M
    if (ctxt->nodeNr > 0)
1796
377M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
1.32M
    else
1798
1.32M
        ctxt->node = NULL;
1799
379M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
379M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
379M
    return (ret);
1802
379M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
309M
{
1821
309M
    xmlStartTag *tag;
1822
1823
309M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
134k
        const xmlChar * *tmp;
1825
134k
        xmlStartTag *tmp2;
1826
134k
        ctxt->nameMax *= 2;
1827
134k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
134k
                                    ctxt->nameMax *
1829
134k
                                    sizeof(ctxt->nameTab[0]));
1830
134k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
134k
  ctxt->nameTab = tmp;
1835
134k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
134k
                                    ctxt->nameMax *
1837
134k
                                    sizeof(ctxt->pushTab[0]));
1838
134k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
134k
  ctxt->pushTab = tmp2;
1843
309M
    } else if (ctxt->pushTab == NULL) {
1844
1.11M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
1.11M
                                            sizeof(ctxt->pushTab[0]));
1846
1.11M
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
1.11M
    }
1849
309M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
309M
    ctxt->name = value;
1851
309M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
309M
    tag->prefix = prefix;
1853
309M
    tag->URI = URI;
1854
309M
    tag->line = line;
1855
309M
    tag->nsNr = nsNr;
1856
309M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
309M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
45.0M
{
1873
45.0M
    const xmlChar *ret;
1874
1875
45.0M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
45.0M
    ctxt->nameNr--;
1878
45.0M
    if (ctxt->nameNr > 0)
1879
44.9M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
112k
    else
1881
112k
        ctxt->name = NULL;
1882
45.0M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
45.0M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
45.0M
    return (ret);
1885
45.0M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
262M
{
1931
262M
    const xmlChar *ret;
1932
1933
262M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
262M
    ctxt->nameNr--;
1936
262M
    if (ctxt->nameNr > 0)
1937
261M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
792k
    else
1939
792k
        ctxt->name = NULL;
1940
262M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
262M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
262M
    return (ret);
1943
262M
}
1944
1945
470M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
470M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
136k
        int *tmp;
1948
1949
136k
  ctxt->spaceMax *= 2;
1950
136k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
136k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
136k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
136k
  ctxt->spaceTab = tmp;
1958
136k
    }
1959
470M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
470M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
470M
    return(ctxt->spaceNr++);
1962
470M
}
1963
1964
468M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
468M
    int ret;
1966
468M
    if (ctxt->spaceNr <= 0) return(0);
1967
468M
    ctxt->spaceNr--;
1968
468M
    if (ctxt->spaceNr > 0)
1969
468M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
241k
    else
1971
241k
        ctxt->space = &ctxt->spaceTab[0];
1972
468M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
468M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
468M
    return(ret);
1975
468M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
16.2G
#define RAW (*ctxt->input->cur)
2013
10.5G
#define CUR (*ctxt->input->cur)
2014
12.7G
#define NXT(val) ctxt->input->cur[(val)]
2015
1.47G
#define CUR_PTR ctxt->input->cur
2016
3.79M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
2.49G
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
1.24G
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
2.37G
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
2.14G
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
1.94G
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
1.75G
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
822M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
822M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
620k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
620k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
5.22G
#define SKIP(val) do {             \
2037
5.22G
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
5.22G
    if (*ctxt->input->cur == 0)           \
2039
5.22G
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
5.22G
  } while (0)
2041
2042
2.42M
#define SKIPL(val) do {             \
2043
2.42M
    int skipl;                \
2044
711M
    for(skipl=0; skipl<val; skipl++) {         \
2045
708M
  if (*(ctxt->input->cur) == '\n') {       \
2046
17.8M
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
690M
  } else ctxt->input->col++;         \
2048
708M
  ctxt->input->cur++;           \
2049
708M
    }                  \
2050
2.42M
    if (*ctxt->input->cur == 0)           \
2051
2.42M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
2.42M
  } while (0)
2053
2054
5.66G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
5.66G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
5.66G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
5.66G
  xmlSHRINK (ctxt);
2058
2059
8.46M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
8.46M
    if ((ctxt->input->buf) &&
2062
8.46M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
88.9k
        xmlParserInputShrink(ctxt->input);
2064
8.46M
    if (*ctxt->input->cur == 0)
2065
331k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
8.46M
}
2067
2068
16.7G
#define GROW if ((ctxt->progressive == 0) &&       \
2069
16.7G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
16.7G
  xmlGROW (ctxt);
2071
2072
1.78G
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
1.78G
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
1.78G
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
1.78G
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
1.78G
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
1.78G
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
1.78G
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
1.78G
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
1.78G
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
1.78G
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
1.78G
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
19.9M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
1.78G
}
2095
2096
4.50G
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
5.48G
#define NEXT xmlNextChar(ctxt)
2099
2100
676M
#define NEXT1 {               \
2101
676M
  ctxt->input->col++;           \
2102
676M
  ctxt->input->cur++;           \
2103
676M
  if (*ctxt->input->cur == 0)         \
2104
676M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
676M
    }
2106
2107
9.33G
#define NEXTL(l) do {             \
2108
9.33G
    if (*(ctxt->input->cur) == '\n') {         \
2109
141M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
9.19G
    } else ctxt->input->col++;           \
2111
9.33G
    ctxt->input->cur += l;        \
2112
9.33G
  } while (0)
2113
2114
9.55G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
115G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
122G
    if (l == 1) b[i++] = v;           \
2119
122G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
4.50G
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
4.50G
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
4.50G
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
4.50G
        (ctxt->instate == XML_PARSER_START)) {
2141
2.42G
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
2.42G
  cur = ctxt->input->cur;
2146
2.42G
  while (IS_BLANK_CH(*cur)) {
2147
747M
      if (*cur == '\n') {
2148
5.11M
    ctxt->input->line++; ctxt->input->col = 1;
2149
742M
      } else {
2150
742M
    ctxt->input->col++;
2151
742M
      }
2152
747M
      cur++;
2153
747M
      if (res < INT_MAX)
2154
747M
    res++;
2155
747M
      if (*cur == 0) {
2156
279k
    ctxt->input->cur = cur;
2157
279k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
279k
    cur = ctxt->input->cur;
2159
279k
      }
2160
747M
  }
2161
2.42G
  ctxt->input->cur = cur;
2162
2.42G
    } else {
2163
2.08G
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
5.68G
  while (ctxt->instate != XML_PARSER_EOF) {
2166
5.68G
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
1.74G
    NEXT;
2168
3.94G
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
996M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
5.92M
                    break;
2174
990M
          xmlParsePEReference(ctxt);
2175
2.95G
            } else if (CUR == 0) {
2176
876M
                unsigned long consumed;
2177
876M
                xmlEntityPtr ent;
2178
2179
876M
                if (ctxt->inputNr <= 1)
2180
141k
                    break;
2181
2182
876M
                consumed = ctxt->input->consumed;
2183
876M
                xmlSaturatedAddSizeT(&consumed,
2184
876M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
876M
                ent = ctxt->input->entity;
2191
876M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
876M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
17.8k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
17.8k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
17.8k
                }
2197
2198
876M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
876M
                xmlPopInput(ctxt);
2201
2.07G
            } else {
2202
2.07G
                break;
2203
2.07G
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
3.60G
      if (res < INT_MAX)
2213
3.60G
    res++;
2214
3.60G
        }
2215
2.08G
    }
2216
4.50G
    return(res);
2217
4.50G
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
876M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
876M
    xmlParserInputPtr input;
2237
2238
876M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
876M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
876M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
876M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
876M
    input = inputPop(ctxt);
2247
876M
    if (input->entity != NULL)
2248
876M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
876M
    xmlFreeInputStream(input);
2250
876M
    if (*ctxt->input->cur == 0)
2251
425M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
876M
    return(CUR);
2253
876M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
876M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
876M
    int ret;
2267
876M
    if (input == NULL) return(-1);
2268
2269
876M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
876M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
876M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
876M
    ret = inputPush(ctxt, input);
2285
876M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
876M
    GROW;
2288
876M
    return(ret);
2289
876M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.82M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.82M
    int val = 0;
2311
1.82M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.82M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.82M
        (NXT(2) == 'x')) {
2318
429k
  SKIP(3);
2319
429k
  GROW;
2320
4.22M
  while (RAW != ';') { /* loop blocked by count */
2321
3.83M
      if (count++ > 20) {
2322
292k
    count = 0;
2323
292k
    GROW;
2324
292k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
292k
      }
2327
3.83M
      if ((RAW >= '0') && (RAW <= '9'))
2328
3.47M
          val = val * 16 + (CUR - '0');
2329
356k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
268k
          val = val * 16 + (CUR - 'a') + 10;
2331
88.3k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
52.6k
          val = val * 16 + (CUR - 'A') + 10;
2333
35.6k
      else {
2334
35.6k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
35.6k
    val = 0;
2336
35.6k
    break;
2337
35.6k
      }
2338
3.79M
      if (val > 0x110000)
2339
3.21M
          val = 0x110000;
2340
2341
3.79M
      NEXT;
2342
3.79M
      count++;
2343
3.79M
  }
2344
429k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
393k
      ctxt->input->col++;
2347
393k
      ctxt->input->cur++;
2348
393k
  }
2349
1.39M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
1.39M
  SKIP(2);
2351
1.39M
  GROW;
2352
6.25M
  while (RAW != ';') { /* loop blocked by count */
2353
4.91M
      if (count++ > 20) {
2354
118k
    count = 0;
2355
118k
    GROW;
2356
118k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
118k
      }
2359
4.91M
      if ((RAW >= '0') && (RAW <= '9'))
2360
4.85M
          val = val * 10 + (CUR - '0');
2361
56.6k
      else {
2362
56.6k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
56.6k
    val = 0;
2364
56.6k
    break;
2365
56.6k
      }
2366
4.85M
      if (val > 0x110000)
2367
1.27M
          val = 0x110000;
2368
2369
4.85M
      NEXT;
2370
4.85M
      count++;
2371
4.85M
  }
2372
1.39M
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
1.34M
      ctxt->input->col++;
2375
1.34M
      ctxt->input->cur++;
2376
1.34M
  }
2377
1.39M
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.82M
    if (val >= 0x110000) {
2389
2.13k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
2.13k
                "xmlParseCharRef: character reference out of bounds\n",
2391
2.13k
          val);
2392
1.82M
    } else if (IS_CHAR(val)) {
2393
1.73M
        return(val);
2394
1.73M
    } else {
2395
95.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
95.0k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
95.0k
                    val);
2398
95.0k
    }
2399
97.1k
    return(0);
2400
1.82M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
2.92M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
2.92M
    const xmlChar *ptr;
2423
2.92M
    xmlChar cur;
2424
2.92M
    int val = 0;
2425
2426
2.92M
    if ((str == NULL) || (*str == NULL)) return(0);
2427
2.92M
    ptr = *str;
2428
2.92M
    cur = *ptr;
2429
2.92M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
202k
  ptr += 3;
2431
202k
  cur = *ptr;
2432
709k
  while (cur != ';') { /* Non input consuming loop */
2433
514k
      if ((cur >= '0') && (cur <= '9'))
2434
374k
          val = val * 16 + (cur - '0');
2435
139k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
7.57k
          val = val * 16 + (cur - 'a') + 10;
2437
132k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
124k
          val = val * 16 + (cur - 'A') + 10;
2439
8.06k
      else {
2440
8.06k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
8.06k
    val = 0;
2442
8.06k
    break;
2443
8.06k
      }
2444
506k
      if (val > 0x110000)
2445
224k
          val = 0x110000;
2446
2447
506k
      ptr++;
2448
506k
      cur = *ptr;
2449
506k
  }
2450
202k
  if (cur == ';')
2451
194k
      ptr++;
2452
2.71M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
2.71M
  ptr += 2;
2454
2.71M
  cur = *ptr;
2455
9.53M
  while (cur != ';') { /* Non input consuming loops */
2456
6.82M
      if ((cur >= '0') && (cur <= '9'))
2457
6.81M
          val = val * 10 + (cur - '0');
2458
6.61k
      else {
2459
6.61k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
6.61k
    val = 0;
2461
6.61k
    break;
2462
6.61k
      }
2463
6.81M
      if (val > 0x110000)
2464
361k
          val = 0x110000;
2465
2466
6.81M
      ptr++;
2467
6.81M
      cur = *ptr;
2468
6.81M
  }
2469
2.71M
  if (cur == ';')
2470
2.71M
      ptr++;
2471
2.71M
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
2.92M
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
2.92M
    if (val >= 0x110000) {
2483
451
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
451
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
451
                val);
2486
2.92M
    } else if (IS_CHAR(val)) {
2487
2.90M
        return(val);
2488
2.90M
    } else {
2489
15.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
15.8k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
15.8k
        val);
2492
15.8k
    }
2493
16.3k
    return(0);
2494
2.92M
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
61.6M
#define growBuffer(buffer, n) {           \
2593
61.6M
    xmlChar *tmp;             \
2594
61.6M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
61.6M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
61.6M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
61.6M
    if (tmp == NULL) goto mem_error;         \
2598
61.6M
    buffer = tmp;             \
2599
61.6M
    buffer##_size = new_size;                                           \
2600
61.6M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
823M
                           int check) {
2617
823M
    xmlChar *buffer = NULL;
2618
823M
    size_t buffer_size = 0;
2619
823M
    size_t nbchars = 0;
2620
2621
823M
    xmlChar *current = NULL;
2622
823M
    xmlChar *rep = NULL;
2623
823M
    const xmlChar *last;
2624
823M
    xmlEntityPtr ent;
2625
823M
    int c,l;
2626
2627
823M
    if (str == NULL)
2628
23.5k
        return(NULL);
2629
823M
    last = str + len;
2630
2631
823M
    if (((ctxt->depth > 40) &&
2632
823M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
823M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
823M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
823M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
823M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
823M
    if (str < last)
2651
820M
  c = CUR_SCHAR(str, l);
2652
2.17M
    else
2653
2.17M
        c = 0;
2654
99.0G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
99.0G
           (c != end2) && (c != end3) &&
2656
99.0G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
98.1G
  if (c == 0) break;
2659
98.1G
        if ((c == '&') && (str[1] == '#')) {
2660
2.92M
      int val = xmlParseStringCharRef(ctxt, &str);
2661
2.92M
      if (val == 0)
2662
16.3k
                goto int_error;
2663
2.90M
      COPY_BUF(0,buffer,nbchars,val);
2664
2.90M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
1.75k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
1.75k
      }
2667
98.1G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
864M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
864M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
864M
      if ((ent != NULL) &&
2674
864M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
1.45M
    if (ent->content != NULL) {
2676
1.45M
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
1.45M
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
24.8k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
24.8k
        }
2680
1.45M
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
863M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
767M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
7.89k
                    goto int_error;
2688
2689
767M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
1.49k
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
1.49k
                    xmlHaltParser(ctxt);
2692
1.49k
                    ent->content[0] = 0;
2693
1.49k
                    goto int_error;
2694
1.49k
                }
2695
2696
767M
                ent->flags |= XML_ENT_EXPANDING;
2697
767M
    ctxt->depth++;
2698
767M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
767M
                        ent->length, what, 0, 0, 0, check);
2700
767M
    ctxt->depth--;
2701
767M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
767M
    if (rep == NULL) {
2704
144k
                    ent->content[0] = 0;
2705
144k
                    goto int_error;
2706
144k
                }
2707
2708
767M
                current = rep;
2709
288G
                while (*current != 0) { /* non input consuming loop */
2710
287G
                    buffer[nbchars++] = *current++;
2711
287G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
97.1M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
97.1M
                    }
2714
287G
                }
2715
767M
                xmlFree(rep);
2716
767M
                rep = NULL;
2717
767M
      } else if (ent != NULL) {
2718
7.47M
    int i = xmlStrlen(ent->name);
2719
7.47M
    const xmlChar *cur = ent->name;
2720
2721
7.47M
    buffer[nbchars++] = '&';
2722
7.47M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
46.3k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
46.3k
    }
2725
68.5M
    for (;i > 0;i--)
2726
61.1M
        buffer[nbchars++] = *cur++;
2727
7.47M
    buffer[nbchars++] = ';';
2728
7.47M
      }
2729
97.3G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
7.01M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
7.01M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
7.01M
      if (ent != NULL) {
2735
6.73M
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
19.1k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
19.1k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
19.1k
      (ctxt->validate != 0)) {
2745
19.0k
      xmlLoadEntityContent(ctxt, ent);
2746
19.0k
        } else {
2747
170
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
170
      "not validating will not read content for PE entity %s\n",
2749
170
                          ent->name, NULL);
2750
170
        }
2751
19.1k
    }
2752
2753
6.73M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
4.28k
                    goto int_error;
2755
2756
6.72M
                if (ent->flags & XML_ENT_EXPANDING) {
2757
196
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
196
                    xmlHaltParser(ctxt);
2759
196
                    if (ent->content != NULL)
2760
196
                        ent->content[0] = 0;
2761
196
                    goto int_error;
2762
196
                }
2763
2764
6.72M
                ent->flags |= XML_ENT_EXPANDING;
2765
6.72M
    ctxt->depth++;
2766
6.72M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
6.72M
                        ent->length, what, 0, 0, 0, check);
2768
6.72M
    ctxt->depth--;
2769
6.72M
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
6.72M
    if (rep == NULL) {
2772
10.0k
                    if (ent->content != NULL)
2773
829
                        ent->content[0] = 0;
2774
10.0k
                    goto int_error;
2775
10.0k
                }
2776
6.71M
                current = rep;
2777
54.5G
                while (*current != 0) { /* non input consuming loop */
2778
54.5G
                    buffer[nbchars++] = *current++;
2779
54.5G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
1.80M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
1.80M
                    }
2782
54.5G
                }
2783
6.71M
                xmlFree(rep);
2784
6.71M
                rep = NULL;
2785
6.71M
      }
2786
97.3G
  } else {
2787
97.3G
      COPY_BUF(l,buffer,nbchars,c);
2788
97.3G
      str += l;
2789
97.3G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
21.9M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
21.9M
      }
2792
97.3G
  }
2793
98.1G
  if (str < last)
2794
97.3G
      c = CUR_SCHAR(str, l);
2795
820M
  else
2796
820M
      c = 0;
2797
98.1G
    }
2798
822M
    buffer[nbchars] = 0;
2799
822M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
185k
int_error:
2804
185k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
185k
    if (buffer != NULL)
2807
185k
        xmlFree(buffer);
2808
185k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
13.2k
                           xmlChar end3) {
2836
13.2k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
13.2k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
13.2k
                                      end, end2, end3, 0));
2840
13.2k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
260k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
260k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
260k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
260k
                                      end, end2, end3, 0));
2868
260k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
177M
                     int blank_chars) {
2890
177M
    int i, ret;
2891
177M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
177M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
12.0M
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
165M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
165M
        (*(ctxt->space) == -2))
2905
85.7M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
79.2M
    if (blank_chars == 0) {
2911
435M
  for (i = 0;i < len;i++)
2912
373M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
64.6M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
76.1M
    if (ctxt->node == NULL) return(0);
2919
75.9M
    if (ctxt->myDoc != NULL) {
2920
75.9M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
75.9M
        if (ret == 0) return(1);
2922
75.3M
        if (ret == 1) return(0);
2923
75.3M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
75.3M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
75.2M
    if ((ctxt->node->children == NULL) &&
2930
75.2M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
75.2M
    lastChild = xmlGetLastChild(ctxt->node);
2933
75.2M
    if (lastChild == NULL) {
2934
3.24M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
3.24M
            (ctxt->node->content != NULL)) return(0);
2936
71.9M
    } else if (xmlNodeIsText(lastChild))
2937
119k
        return(0);
2938
71.8M
    else if ((ctxt->node->children != NULL) &&
2939
71.8M
             (xmlNodeIsText(ctxt->node->children)))
2940
269k
        return(0);
2941
74.8M
    return(1);
2942
75.2M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
385M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
385M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
385M
    xmlChar *buffer = NULL;
2973
385M
    int len = 0;
2974
385M
    int max = XML_MAX_NAMELEN;
2975
385M
    xmlChar *ret = NULL;
2976
385M
    const xmlChar *cur = name;
2977
385M
    int c;
2978
2979
385M
    if (prefix == NULL) return(NULL);
2980
385M
    *prefix = NULL;
2981
2982
385M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
385M
    if (cur[0] == ':')
2993
2.40k
  return(xmlStrdup(name));
2994
2995
385M
    c = *cur++;
2996
1.19G
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
812M
  buf[len++] = c;
2998
812M
  c = *cur++;
2999
812M
    }
3000
385M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
24.0k
  max = len * 2;
3006
3007
24.0k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
24.0k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
24.0k
  memcpy(buffer, buf, len);
3013
93.3M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
93.3M
      if (len + 10 > max) {
3015
67.5k
          xmlChar *tmp;
3016
3017
67.5k
    max *= 2;
3018
67.5k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
67.5k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
67.5k
    buffer = tmp;
3025
67.5k
      }
3026
93.3M
      buffer[len++] = c;
3027
93.3M
      c = *cur++;
3028
93.3M
  }
3029
24.0k
  buffer[len] = 0;
3030
24.0k
    }
3031
3032
385M
    if ((c == ':') && (*cur == 0)) {
3033
4.35k
        if (buffer != NULL)
3034
205
      xmlFree(buffer);
3035
4.35k
  *prefix = NULL;
3036
4.35k
  return(xmlStrdup(name));
3037
4.35k
    }
3038
3039
385M
    if (buffer == NULL)
3040
385M
  ret = xmlStrndup(buf, len);
3041
23.8k
    else {
3042
23.8k
  ret = buffer;
3043
23.8k
  buffer = NULL;
3044
23.8k
  max = XML_MAX_NAMELEN;
3045
23.8k
    }
3046
3047
3048
385M
    if (c == ':') {
3049
3.05M
  c = *cur;
3050
3.05M
        *prefix = ret;
3051
3.05M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
3.05M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
3.05M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
3.05M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
3.05M
        (c == '_') || (c == ':'))) {
3063
5.51k
      int l;
3064
5.51k
      int first = CUR_SCHAR(cur, l);
3065
3066
5.51k
      if (!IS_LETTER(first) && (first != '_')) {
3067
3.07k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
3.07k
          "Name %s is not XML Namespace compliant\n",
3069
3.07k
          name);
3070
3.07k
      }
3071
5.51k
  }
3072
3.05M
  cur++;
3073
3074
19.6M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
16.5M
      buf[len++] = c;
3076
16.5M
      c = *cur++;
3077
16.5M
  }
3078
3.05M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
14.1k
      max = len * 2;
3084
3085
14.1k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
14.1k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
14.1k
      memcpy(buffer, buf, len);
3091
51.1M
      while (c != 0) { /* tested bigname2.xml */
3092
51.1M
    if (len + 10 > max) {
3093
38.3k
        xmlChar *tmp;
3094
3095
38.3k
        max *= 2;
3096
38.3k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
38.3k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
38.3k
        buffer = tmp;
3103
38.3k
    }
3104
51.1M
    buffer[len++] = c;
3105
51.1M
    c = *cur++;
3106
51.1M
      }
3107
14.1k
      buffer[len] = 0;
3108
14.1k
  }
3109
3110
3.05M
  if (buffer == NULL)
3111
3.04M
      ret = xmlStrndup(buf, len);
3112
14.1k
  else {
3113
14.1k
      ret = buffer;
3114
14.1k
  }
3115
3.05M
    }
3116
3117
385M
    return(ret);
3118
385M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
886M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
886M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
872M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
872M
      (((c >= 'a') && (c <= 'z')) ||
3160
872M
       ((c >= 'A') && (c <= 'Z')) ||
3161
872M
       (c == '_') || (c == ':') ||
3162
872M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
872M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
872M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
872M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
872M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
872M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
872M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
872M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
872M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
872M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
872M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
872M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
869M
      return(1);
3175
872M
    } else {
3176
14.2M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
13.7M
      return(1);
3178
14.2M
    }
3179
2.84M
    return(0);
3180
886M
}
3181
3182
static int
3183
16.3G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
16.3G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
16.2G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
16.2G
      (((c >= 'a') && (c <= 'z')) ||
3191
16.2G
       ((c >= 'A') && (c <= 'Z')) ||
3192
16.2G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
16.2G
       (c == '_') || (c == ':') ||
3194
16.2G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
16.2G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
16.2G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
16.2G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
16.2G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
16.2G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
16.2G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
16.2G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
16.2G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
16.2G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
16.2G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
16.2G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
16.2G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
16.2G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
16.2G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
15.3G
       return(1);
3210
16.2G
    } else {
3211
107M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
107M
            (c == '.') || (c == '-') ||
3213
107M
      (c == '_') || (c == ':') ||
3214
107M
      (IS_COMBINING(c)) ||
3215
107M
      (IS_EXTENDER(c)))
3216
92.2M
      return(1);
3217
107M
    }
3218
898M
    return(0);
3219
16.3G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
7.82M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
7.82M
    int len = 0, l;
3227
7.82M
    int c;
3228
7.82M
    int count = 0;
3229
7.82M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
2.66M
                    XML_MAX_TEXT_LENGTH :
3231
7.82M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
7.82M
    GROW;
3241
7.82M
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
7.82M
    c = CUR_CHAR(l);
3244
7.82M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
4.78M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
4.78M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
4.71M
         ((c >= 'A') && (c <= 'Z')) ||
3252
4.71M
         (c == '_') || (c == ':') ||
3253
4.71M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
4.71M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
4.71M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
4.71M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
4.71M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
4.71M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
4.71M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
4.71M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
4.71M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
4.71M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
4.71M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
4.71M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.63M
      return(NULL);
3266
1.63M
  }
3267
3.15M
  len += l;
3268
3.15M
  NEXTL(l);
3269
3.15M
  c = CUR_CHAR(l);
3270
139M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
139M
         (((c >= 'a') && (c <= 'z')) ||
3272
139M
          ((c >= 'A') && (c <= 'Z')) ||
3273
139M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
139M
          (c == '_') || (c == ':') ||
3275
139M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
139M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
139M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
139M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
139M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
139M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
139M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
139M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
139M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
139M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
139M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
139M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
139M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
139M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
139M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
139M
    )) {
3291
136M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
1.19M
    count = 0;
3293
1.19M
    GROW;
3294
1.19M
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
1.19M
      }
3297
136M
            if (len <= INT_MAX - l)
3298
136M
          len += l;
3299
136M
      NEXTL(l);
3300
136M
      c = CUR_CHAR(l);
3301
136M
  }
3302
3.15M
    } else {
3303
3.04M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
3.04M
      (!IS_LETTER(c) && (c != '_') &&
3305
2.97M
       (c != ':'))) {
3306
1.86M
      return(NULL);
3307
1.86M
  }
3308
1.17M
  len += l;
3309
1.17M
  NEXTL(l);
3310
1.17M
  c = CUR_CHAR(l);
3311
3312
134M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
134M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
134M
    (c == '.') || (c == '-') ||
3315
134M
    (c == '_') || (c == ':') ||
3316
134M
    (IS_COMBINING(c)) ||
3317
134M
    (IS_EXTENDER(c)))) {
3318
133M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
1.24M
    count = 0;
3320
1.24M
    GROW;
3321
1.24M
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
1.24M
      }
3324
133M
            if (len <= INT_MAX - l)
3325
133M
          len += l;
3326
133M
      NEXTL(l);
3327
133M
      c = CUR_CHAR(l);
3328
133M
  }
3329
1.17M
    }
3330
4.33M
    if (len > maxLength) {
3331
1.08k
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
1.08k
        return(NULL);
3333
1.08k
    }
3334
4.32M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
4.32M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
14.7k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
4.31M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
4.32M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
1.65G
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
1.65G
    const xmlChar *in;
3370
1.65G
    const xmlChar *ret;
3371
1.65G
    size_t count = 0;
3372
1.65G
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
206M
                       XML_MAX_TEXT_LENGTH :
3374
1.65G
                       XML_MAX_NAME_LENGTH;
3375
3376
1.65G
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
1.65G
    in = ctxt->input->cur;
3386
1.65G
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
1.65G
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
1.65G
  (*in == '_') || (*in == ':')) {
3389
1.65G
  in++;
3390
9.28G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
9.28G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
9.28G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
9.28G
         (*in == '_') || (*in == '-') ||
3394
9.28G
         (*in == ':') || (*in == '.'))
3395
7.63G
      in++;
3396
1.65G
  if ((*in > 0) && (*in < 0x80)) {
3397
1.64G
      count = in - ctxt->input->cur;
3398
1.64G
            if (count > maxLength) {
3399
1.05k
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
1.05k
                return(NULL);
3401
1.05k
            }
3402
1.64G
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
1.64G
      ctxt->input->cur = in;
3404
1.64G
      ctxt->input->col += count;
3405
1.64G
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
1.64G
      return(ret);
3408
1.64G
  }
3409
1.65G
    }
3410
    /* accelerator for special cases */
3411
7.82M
    return(xmlParseNameComplex(ctxt));
3412
1.65G
}
3413
3414
static const xmlChar *
3415
3.33M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
3.33M
    int len = 0, l;
3417
3.33M
    int c;
3418
3.33M
    int count = 0;
3419
3.33M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
524k
                    XML_MAX_TEXT_LENGTH :
3421
3.33M
                    XML_MAX_NAME_LENGTH;
3422
3.33M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
3.33M
    GROW;
3432
3.33M
    startPosition = CUR_PTR - BASE_PTR;
3433
3.33M
    c = CUR_CHAR(l);
3434
3.33M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
3.33M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
2.86M
  return(NULL);
3437
2.86M
    }
3438
3439
72.9M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
72.9M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
72.4M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
682k
      count = 0;
3443
682k
      GROW;
3444
682k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
682k
  }
3447
72.4M
        if (len <= INT_MAX - l)
3448
72.4M
      len += l;
3449
72.4M
  NEXTL(l);
3450
72.4M
  c = CUR_CHAR(l);
3451
72.4M
  if (c == 0) {
3452
11.7k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
11.7k
      ctxt->input->cur -= l;
3459
11.7k
      GROW;
3460
11.7k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
11.7k
      ctxt->input->cur += l;
3463
11.7k
      c = CUR_CHAR(l);
3464
11.7k
  }
3465
72.4M
    }
3466
466k
    if (len > maxLength) {
3467
453
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
453
        return(NULL);
3469
453
    }
3470
466k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
466k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
843M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
843M
    const xmlChar *in, *e;
3491
843M
    const xmlChar *ret;
3492
843M
    size_t count = 0;
3493
843M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
80.0M
                       XML_MAX_TEXT_LENGTH :
3495
843M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
843M
    in = ctxt->input->cur;
3505
843M
    e = ctxt->input->end;
3506
843M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
843M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
843M
   (*in == '_')) && (in < e)) {
3509
840M
  in++;
3510
2.05G
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
2.05G
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
2.05G
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
2.05G
          (*in == '_') || (*in == '-') ||
3514
2.05G
          (*in == '.')) && (in < e))
3515
1.21G
      in++;
3516
840M
  if (in >= e)
3517
7.02k
      goto complex;
3518
840M
  if ((*in > 0) && (*in < 0x80)) {
3519
840M
      count = in - ctxt->input->cur;
3520
840M
            if (count > maxLength) {
3521
1.10k
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
1.10k
                return(NULL);
3523
1.10k
            }
3524
840M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
840M
      ctxt->input->cur = in;
3526
840M
      ctxt->input->col += count;
3527
840M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
840M
      return(ret);
3531
840M
  }
3532
840M
    }
3533
3.33M
complex:
3534
3.33M
    return(xmlParseNCNameComplex(ctxt));
3535
843M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
128M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
128M
    register const xmlChar *cmp = other;
3551
128M
    register const xmlChar *in;
3552
128M
    const xmlChar *ret;
3553
3554
128M
    GROW;
3555
128M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
128M
    in = ctxt->input->cur;
3559
621M
    while (*in != 0 && *in == *cmp) {
3560
492M
  ++in;
3561
492M
  ++cmp;
3562
492M
    }
3563
128M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
127M
  ctxt->input->col += in - ctxt->input->cur;
3566
127M
  ctxt->input->cur = in;
3567
127M
  return (const xmlChar*) 1;
3568
127M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
1.45M
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
1.45M
    if (ret == other) {
3573
37.9k
  return (const xmlChar*) 1;
3574
37.9k
    }
3575
1.41M
    return ret;
3576
1.45M
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
883M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
883M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
883M
    const xmlChar *cur = *str;
3600
883M
    int len = 0, l;
3601
883M
    int c;
3602
883M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
3.61M
                    XML_MAX_TEXT_LENGTH :
3604
883M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
883M
    c = CUR_SCHAR(cur, l);
3611
883M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
45.0k
  return(NULL);
3613
45.0k
    }
3614
3615
883M
    COPY_BUF(l,buf,len,c);
3616
883M
    cur += l;
3617
883M
    c = CUR_SCHAR(cur, l);
3618
7.65G
    while (xmlIsNameChar(ctxt, c)) {
3619
6.80G
  COPY_BUF(l,buf,len,c);
3620
6.80G
  cur += l;
3621
6.80G
  c = CUR_SCHAR(cur, l);
3622
6.80G
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
36.8M
      xmlChar *buffer;
3628
36.8M
      int max = len * 2;
3629
3630
36.8M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
36.8M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
36.8M
      memcpy(buffer, buf, len);
3636
8.52G
      while (xmlIsNameChar(ctxt, c)) {
3637
8.48G
    if (len + 10 > max) {
3638
36.9M
        xmlChar *tmp;
3639
3640
36.9M
        max *= 2;
3641
36.9M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
36.9M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
36.9M
        buffer = tmp;
3648
36.9M
    }
3649
8.48G
    COPY_BUF(l,buffer,len,c);
3650
8.48G
    cur += l;
3651
8.48G
    c = CUR_SCHAR(cur, l);
3652
8.48G
                if (len > maxLength) {
3653
211
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
211
                    xmlFree(buffer);
3655
211
                    return(NULL);
3656
211
                }
3657
8.48G
      }
3658
36.8M
      buffer[len] = 0;
3659
36.8M
      *str = cur;
3660
36.8M
      return(buffer);
3661
36.8M
  }
3662
6.80G
    }
3663
846M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
846M
    *str = cur;
3668
846M
    return(xmlStrndup(buf, len));
3669
846M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
14.8M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
14.8M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
14.8M
    int len = 0, l;
3690
14.8M
    int c;
3691
14.8M
    int count = 0;
3692
14.8M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
263k
                    XML_MAX_TEXT_LENGTH :
3694
14.8M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
14.8M
    GROW;
3701
14.8M
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
14.8M
    c = CUR_CHAR(l);
3704
3705
60.6M
    while (xmlIsNameChar(ctxt, c)) {
3706
45.7M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
45.7M
  COPY_BUF(l,buf,len,c);
3711
45.7M
  NEXTL(l);
3712
45.7M
  c = CUR_CHAR(l);
3713
45.7M
  if (c == 0) {
3714
1.06k
      count = 0;
3715
1.06k
      GROW;
3716
1.06k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
1.06k
            c = CUR_CHAR(l);
3719
1.06k
  }
3720
45.7M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
10.9k
      xmlChar *buffer;
3726
10.9k
      int max = len * 2;
3727
3728
10.9k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
10.9k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
10.9k
      memcpy(buffer, buf, len);
3734
54.5M
      while (xmlIsNameChar(ctxt, c)) {
3735
54.4M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
539k
        count = 0;
3737
539k
        GROW;
3738
539k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
539k
    }
3743
54.4M
    if (len + 10 > max) {
3744
28.2k
        xmlChar *tmp;
3745
3746
28.2k
        max *= 2;
3747
28.2k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
28.2k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
28.2k
        buffer = tmp;
3754
28.2k
    }
3755
54.4M
    COPY_BUF(l,buffer,len,c);
3756
54.4M
    NEXTL(l);
3757
54.4M
    c = CUR_CHAR(l);
3758
54.4M
                if (len > maxLength) {
3759
504
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
504
                    xmlFree(buffer);
3761
504
                    return(NULL);
3762
504
                }
3763
54.4M
      }
3764
10.4k
      buffer[len] = 0;
3765
10.4k
      return(buffer);
3766
10.9k
  }
3767
45.7M
    }
3768
14.8M
    if (len == 0)
3769
12.6k
        return(NULL);
3770
14.8M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
14.8M
    return(xmlStrndup(buf, len));
3775
14.8M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
10.1M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
10.1M
    xmlChar *buf = NULL;
3795
10.1M
    int len = 0;
3796
10.1M
    int size = XML_PARSER_BUFFER_SIZE;
3797
10.1M
    int c, l;
3798
10.1M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
62.2k
                    XML_MAX_HUGE_LENGTH :
3800
10.1M
                    XML_MAX_TEXT_LENGTH;
3801
10.1M
    xmlChar stop;
3802
10.1M
    xmlChar *ret = NULL;
3803
10.1M
    const xmlChar *cur = NULL;
3804
10.1M
    xmlParserInputPtr input;
3805
3806
10.1M
    if (RAW == '"') stop = '"';
3807
1.71M
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
10.1M
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
10.1M
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
10.1M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
10.1M
    input = ctxt->input;
3824
10.1M
    GROW;
3825
10.1M
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
10.1M
    NEXT;
3828
10.1M
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
1.18G
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
1.18G
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
1.17G
  if (len + 5 >= size) {
3841
1.87M
      xmlChar *tmp;
3842
3843
1.87M
      size *= 2;
3844
1.87M
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
1.87M
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
1.87M
      buf = tmp;
3850
1.87M
  }
3851
1.17G
  COPY_BUF(l,buf,len,c);
3852
1.17G
  NEXTL(l);
3853
3854
1.17G
  GROW;
3855
1.17G
  c = CUR_CHAR(l);
3856
1.17G
  if (c == 0) {
3857
5.12k
      GROW;
3858
5.12k
      c = CUR_CHAR(l);
3859
5.12k
  }
3860
3861
1.17G
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
1.17G
    }
3867
10.1M
    buf[len] = 0;
3868
10.1M
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
10.1M
    if (c != stop) {
3871
12.0k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
12.0k
        goto error;
3873
12.0k
    }
3874
10.1M
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
10.1M
    cur = buf;
3882
790M
    while (*cur != 0) { /* non input consuming */
3883
780M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
11.2M
      xmlChar *name;
3885
11.2M
      xmlChar tmp = *cur;
3886
11.2M
            int nameOk = 0;
3887
3888
11.2M
      cur++;
3889
11.2M
      name = xmlParseStringName(ctxt, &cur);
3890
11.2M
            if (name != NULL) {
3891
11.2M
                nameOk = 1;
3892
11.2M
                xmlFree(name);
3893
11.2M
            }
3894
11.2M
            if ((nameOk == 0) || (*cur != ';')) {
3895
15.3k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
15.3k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
15.3k
                            tmp);
3898
15.3k
                goto error;
3899
15.3k
      }
3900
11.2M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
11.2M
    (ctxt->inputNr == 1)) {
3902
11.4k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
11.4k
                goto error;
3904
11.4k
      }
3905
11.2M
      if (*cur == 0)
3906
0
          break;
3907
11.2M
  }
3908
780M
  cur++;
3909
780M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
10.1M
    ++ctxt->depth;
3920
10.1M
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
10.1M
                                     0, 0, 0, /* check */ 1);
3922
10.1M
    --ctxt->depth;
3923
3924
10.1M
    if (orig != NULL) {
3925
10.1M
        *orig = buf;
3926
10.1M
        buf = NULL;
3927
10.1M
    }
3928
3929
10.1M
error:
3930
10.1M
    if (buf != NULL)
3931
38.8k
        xmlFree(buf);
3932
10.1M
    return(ret);
3933
10.1M
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
3.32M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
3.32M
    xmlChar limit = 0;
3950
3.32M
    xmlChar *buf = NULL;
3951
3.32M
    xmlChar *rep = NULL;
3952
3.32M
    size_t len = 0;
3953
3.32M
    size_t buf_size = 0;
3954
3.32M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
457k
                       XML_MAX_HUGE_LENGTH :
3956
3.32M
                       XML_MAX_TEXT_LENGTH;
3957
3.32M
    int c, l, in_space = 0;
3958
3.32M
    xmlChar *current = NULL;
3959
3.32M
    xmlEntityPtr ent;
3960
3961
3.32M
    if (NXT(0) == '"') {
3962
1.11M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.11M
  limit = '"';
3964
1.11M
        NEXT;
3965
2.21M
    } else if (NXT(0) == '\'') {
3966
2.21M
  limit = '\'';
3967
2.21M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
2.21M
        NEXT;
3969
2.21M
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
3.32M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
3.32M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
3.32M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
3.32M
    c = CUR_CHAR(l);
3985
863M
    while (((NXT(0) != limit) && /* checked */
3986
863M
            (IS_CHAR(c)) && (c != '<')) &&
3987
863M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
860M
  if (c == '&') {
3989
64.8M
      in_space = 0;
3990
64.8M
      if (NXT(1) == '#') {
3991
693k
    int val = xmlParseCharRef(ctxt);
3992
3993
693k
    if (val == '&') {
3994
21.3k
        if (ctxt->replaceEntities) {
3995
9.86k
      if (len + 10 > buf_size) {
3996
84
          growBuffer(buf, 10);
3997
84
      }
3998
9.86k
      buf[len++] = '&';
3999
11.4k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
11.4k
      if (len + 10 > buf_size) {
4005
12
          growBuffer(buf, 10);
4006
12
      }
4007
11.4k
      buf[len++] = '&';
4008
11.4k
      buf[len++] = '#';
4009
11.4k
      buf[len++] = '3';
4010
11.4k
      buf[len++] = '8';
4011
11.4k
      buf[len++] = ';';
4012
11.4k
        }
4013
671k
    } else if (val != 0) {
4014
630k
        if (len + 10 > buf_size) {
4015
11.0k
      growBuffer(buf, 10);
4016
11.0k
        }
4017
630k
        len += xmlCopyChar(0, &buf[len], val);
4018
630k
    }
4019
64.1M
      } else {
4020
64.1M
    ent = xmlParseEntityRef(ctxt);
4021
64.1M
    if ((ent != NULL) &&
4022
64.1M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
232k
        if (len + 10 > buf_size) {
4024
228
      growBuffer(buf, 10);
4025
228
        }
4026
232k
        if ((ctxt->replaceEntities == 0) &&
4027
232k
            (ent->content[0] == '&')) {
4028
38.5k
      buf[len++] = '&';
4029
38.5k
      buf[len++] = '#';
4030
38.5k
      buf[len++] = '3';
4031
38.5k
      buf[len++] = '8';
4032
38.5k
      buf[len++] = ';';
4033
194k
        } else {
4034
194k
      buf[len++] = ent->content[0];
4035
194k
        }
4036
63.8M
    } else if ((ent != NULL) &&
4037
63.8M
               (ctxt->replaceEntities != 0)) {
4038
37.8M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
37.8M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
37.8M
      ++ctxt->depth;
4043
37.8M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
37.8M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
37.8M
                                /* check */ 1);
4046
37.8M
      --ctxt->depth;
4047
37.8M
      if (rep != NULL) {
4048
37.8M
          current = rep;
4049
7.63G
          while (*current != 0) { /* non input consuming */
4050
7.60G
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
7.60G
                                    (*current == 0x9)) {
4052
6.57M
                                    buf[len++] = 0x20;
4053
6.57M
                                    current++;
4054
6.57M
                                } else
4055
7.59G
                                    buf[len++] = *current++;
4056
7.60G
        if (len + 10 > buf_size) {
4057
261k
            growBuffer(buf, 10);
4058
261k
        }
4059
7.60G
          }
4060
37.8M
          xmlFree(rep);
4061
37.8M
          rep = NULL;
4062
37.8M
      }
4063
37.8M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
37.8M
    } else if (ent != NULL) {
4071
8.77M
        int i = xmlStrlen(ent->name);
4072
8.77M
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
8.77M
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
8.77M
      (ent->content != NULL)) {
4081
8.72M
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
54.7k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
54.7k
                            ctxt->sizeentcopy = ent->length;
4085
4086
54.7k
                            ++ctxt->depth;
4087
54.7k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
54.7k
                                    ent->content, ent->length,
4089
54.7k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
54.7k
                                    /* check */ 1);
4091
54.7k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
54.7k
                            if (ctxt->inSubset == 0) {
4100
51.9k
                                ent->flags |= XML_ENT_CHECKED;
4101
51.9k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
51.9k
                            }
4103
4104
54.7k
                            if (rep != NULL) {
4105
53.9k
                                xmlFree(rep);
4106
53.9k
                                rep = NULL;
4107
53.9k
                            } else {
4108
800
                                ent->content[0] = 0;
4109
800
                            }
4110
4111
54.7k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
76
                                goto error;
4113
8.67M
                        } else {
4114
8.67M
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
19
                                goto error;
4116
8.67M
                        }
4117
8.72M
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
8.77M
        buf[len++] = '&';
4123
8.80M
        while (len + i + 10 > buf_size) {
4124
46.8k
      growBuffer(buf, i + 10);
4125
46.8k
        }
4126
17.7M
        for (;i > 0;i--)
4127
9.01M
      buf[len++] = *cur++;
4128
8.77M
        buf[len++] = ';';
4129
8.77M
    }
4130
64.1M
      }
4131
795M
  } else {
4132
795M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
20.8M
          if ((len != 0) || (!normalize)) {
4134
20.7M
        if ((!normalize) || (!in_space)) {
4135
20.5M
      COPY_BUF(l,buf,len,0x20);
4136
20.6M
      while (len + 10 > buf_size) {
4137
95.7k
          growBuffer(buf, 10);
4138
95.7k
      }
4139
20.5M
        }
4140
20.7M
        in_space = 1;
4141
20.7M
    }
4142
774M
      } else {
4143
774M
          in_space = 0;
4144
774M
    COPY_BUF(l,buf,len,c);
4145
774M
    if (len + 10 > buf_size) {
4146
1.88M
        growBuffer(buf, 10);
4147
1.88M
    }
4148
774M
      }
4149
795M
      NEXTL(l);
4150
795M
  }
4151
860M
  GROW;
4152
860M
  c = CUR_CHAR(l);
4153
860M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
860M
    }
4159
3.32M
    if (ctxt->instate == XML_PARSER_EOF)
4160
9.31k
        goto error;
4161
4162
3.31M
    if ((in_space) && (normalize)) {
4163
79.6k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
39.0k
    }
4165
3.31M
    buf[len] = 0;
4166
3.31M
    if (RAW == '<') {
4167
337k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
2.97M
    } else if (RAW != limit) {
4169
147k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
105k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
105k
         "invalid character in attribute value\n");
4172
105k
  } else {
4173
41.4k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
41.4k
         "AttValue: ' expected\n");
4175
41.4k
        }
4176
147k
    } else
4177
2.83M
  NEXT;
4178
4179
3.31M
    if (attlen != NULL) *attlen = len;
4180
3.31M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
9.40k
error:
4185
9.40k
    if (buf != NULL)
4186
9.40k
        xmlFree(buf);
4187
9.40k
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
9.40k
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
228M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
228M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
228M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
228M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
12.4M
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
12.4M
    xmlChar *buf = NULL;
4250
12.4M
    int len = 0;
4251
12.4M
    int size = XML_PARSER_BUFFER_SIZE;
4252
12.4M
    int cur, l;
4253
12.4M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
88.8k
                    XML_MAX_TEXT_LENGTH :
4255
12.4M
                    XML_MAX_NAME_LENGTH;
4256
12.4M
    xmlChar stop;
4257
12.4M
    int state = ctxt->instate;
4258
12.4M
    int count = 0;
4259
4260
12.4M
    SHRINK;
4261
12.4M
    if (RAW == '"') {
4262
734k
        NEXT;
4263
734k
  stop = '"';
4264
11.7M
    } else if (RAW == '\'') {
4265
11.7M
        NEXT;
4266
11.7M
  stop = '\'';
4267
11.7M
    } else {
4268
43.0k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
43.0k
  return(NULL);
4270
43.0k
    }
4271
4272
12.4M
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
12.4M
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
12.4M
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
12.4M
    cur = CUR_CHAR(l);
4279
487M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
475M
  if (len + 5 >= size) {
4281
152k
      xmlChar *tmp;
4282
4283
152k
      size *= 2;
4284
152k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
152k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
152k
      buf = tmp;
4292
152k
  }
4293
475M
  count++;
4294
475M
  if (count > 50) {
4295
8.82M
      SHRINK;
4296
8.82M
      GROW;
4297
8.82M
      count = 0;
4298
8.82M
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
8.82M
  }
4303
475M
  COPY_BUF(l,buf,len,cur);
4304
475M
  NEXTL(l);
4305
475M
  cur = CUR_CHAR(l);
4306
475M
  if (cur == 0) {
4307
9.34k
      GROW;
4308
9.34k
      SHRINK;
4309
9.34k
      cur = CUR_CHAR(l);
4310
9.34k
  }
4311
475M
        if (len > maxLength) {
4312
780
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
780
            xmlFree(buf);
4314
780
            ctxt->instate = (xmlParserInputState) state;
4315
780
            return(NULL);
4316
780
        }
4317
475M
    }
4318
12.4M
    buf[len] = 0;
4319
12.4M
    ctxt->instate = (xmlParserInputState) state;
4320
12.4M
    if (!IS_CHAR(cur)) {
4321
14.5k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
12.4M
    } else {
4323
12.4M
  NEXT;
4324
12.4M
    }
4325
12.4M
    return(buf);
4326
12.4M
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
11.2M
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
11.2M
    xmlChar *buf = NULL;
4344
11.2M
    int len = 0;
4345
11.2M
    int size = XML_PARSER_BUFFER_SIZE;
4346
11.2M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
19.0k
                    XML_MAX_TEXT_LENGTH :
4348
11.2M
                    XML_MAX_NAME_LENGTH;
4349
11.2M
    xmlChar cur;
4350
11.2M
    xmlChar stop;
4351
11.2M
    int count = 0;
4352
11.2M
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
11.2M
    SHRINK;
4355
11.2M
    if (RAW == '"') {
4356
203k
        NEXT;
4357
203k
  stop = '"';
4358
11.0M
    } else if (RAW == '\'') {
4359
11.0M
        NEXT;
4360
11.0M
  stop = '\'';
4361
11.0M
    } else {
4362
2.27k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
2.27k
  return(NULL);
4364
2.27k
    }
4365
11.2M
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
11.2M
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
11.2M
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
11.2M
    cur = CUR;
4372
117M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
106M
  if (len + 1 >= size) {
4374
139k
      xmlChar *tmp;
4375
4376
139k
      size *= 2;
4377
139k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
139k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
139k
      buf = tmp;
4384
139k
  }
4385
106M
  buf[len++] = cur;
4386
106M
  count++;
4387
106M
  if (count > 50) {
4388
1.75M
      SHRINK;
4389
1.75M
      GROW;
4390
1.75M
      count = 0;
4391
1.75M
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
1.75M
  }
4396
106M
  NEXT;
4397
106M
  cur = CUR;
4398
106M
  if (cur == 0) {
4399
5.41k
      GROW;
4400
5.41k
      SHRINK;
4401
5.41k
      cur = CUR;
4402
5.41k
  }
4403
106M
        if (len > maxLength) {
4404
85
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
85
            xmlFree(buf);
4406
85
            return(NULL);
4407
85
        }
4408
106M
    }
4409
11.2M
    buf[len] = 0;
4410
11.2M
    if (cur != stop) {
4411
35.7k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
11.1M
    } else {
4413
11.1M
  NEXT;
4414
11.1M
    }
4415
11.2M
    ctxt->instate = oldstate;
4416
11.2M
    return(buf);
4417
11.2M
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
680M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
680M
    const xmlChar *in;
4482
680M
    int nbchar = 0;
4483
680M
    int line = ctxt->input->line;
4484
680M
    int col = ctxt->input->col;
4485
680M
    int ccol;
4486
4487
680M
    SHRINK;
4488
680M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
680M
    in = ctxt->input->cur;
4494
1.01G
    do {
4495
1.12G
get_more_space:
4496
1.18G
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
1.12G
        if (*in == 0xA) {
4498
113M
            do {
4499
113M
                ctxt->input->line++; ctxt->input->col = 1;
4500
113M
                in++;
4501
113M
            } while (*in == 0xA);
4502
108M
            goto get_more_space;
4503
108M
        }
4504
1.01G
        if (*in == '<') {
4505
89.9M
            nbchar = in - ctxt->input->cur;
4506
89.9M
            if (nbchar > 0) {
4507
89.9M
                const xmlChar *tmp = ctxt->input->cur;
4508
89.9M
                ctxt->input->cur = in;
4509
4510
89.9M
                if ((ctxt->sax != NULL) &&
4511
89.9M
                    (ctxt->sax->ignorableWhitespace !=
4512
89.9M
                     ctxt->sax->characters)) {
4513
16.6M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
14.1M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
14.1M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
14.1M
                                                   tmp, nbchar);
4517
14.1M
                    } else {
4518
2.46M
                        if (ctxt->sax->characters != NULL)
4519
2.46M
                            ctxt->sax->characters(ctxt->userData,
4520
2.46M
                                                  tmp, nbchar);
4521
2.46M
                        if (*ctxt->space == -1)
4522
458k
                            *ctxt->space = -2;
4523
2.46M
                    }
4524
73.3M
                } else if ((ctxt->sax != NULL) &&
4525
73.3M
                           (ctxt->sax->characters != NULL)) {
4526
73.3M
                    ctxt->sax->characters(ctxt->userData,
4527
73.3M
                                          tmp, nbchar);
4528
73.3M
                }
4529
89.9M
            }
4530
89.9M
            return;
4531
89.9M
        }
4532
4533
1.06G
get_more:
4534
1.06G
        ccol = ctxt->input->col;
4535
9.21G
        while (test_char_data[*in]) {
4536
8.14G
            in++;
4537
8.14G
            ccol++;
4538
8.14G
        }
4539
1.06G
        ctxt->input->col = ccol;
4540
1.06G
        if (*in == 0xA) {
4541
132M
            do {
4542
132M
                ctxt->input->line++; ctxt->input->col = 1;
4543
132M
                in++;
4544
132M
            } while (*in == 0xA);
4545
130M
            goto get_more;
4546
130M
        }
4547
937M
        if (*in == ']') {
4548
13.0M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
25.1k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
25.1k
                ctxt->input->cur = in + 1;
4551
25.1k
                return;
4552
25.1k
            }
4553
13.0M
            in++;
4554
13.0M
            ctxt->input->col++;
4555
13.0M
            goto get_more;
4556
13.0M
        }
4557
924M
        nbchar = in - ctxt->input->cur;
4558
924M
        if (nbchar > 0) {
4559
511M
            if ((ctxt->sax != NULL) &&
4560
511M
                (ctxt->sax->ignorableWhitespace !=
4561
511M
                 ctxt->sax->characters) &&
4562
511M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
92.6M
                const xmlChar *tmp = ctxt->input->cur;
4564
92.6M
                ctxt->input->cur = in;
4565
4566
92.6M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
61.2M
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
61.2M
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
61.2M
                                                       tmp, nbchar);
4570
61.2M
                } else {
4571
31.3M
                    if (ctxt->sax->characters != NULL)
4572
31.3M
                        ctxt->sax->characters(ctxt->userData,
4573
31.3M
                                              tmp, nbchar);
4574
31.3M
                    if (*ctxt->space == -1)
4575
3.10M
                        *ctxt->space = -2;
4576
31.3M
                }
4577
92.6M
                line = ctxt->input->line;
4578
92.6M
                col = ctxt->input->col;
4579
418M
            } else if (ctxt->sax != NULL) {
4580
418M
                if (ctxt->sax->characters != NULL)
4581
418M
                    ctxt->sax->characters(ctxt->userData,
4582
418M
                                          ctxt->input->cur, nbchar);
4583
418M
                line = ctxt->input->line;
4584
418M
                col = ctxt->input->col;
4585
418M
            }
4586
511M
        }
4587
924M
        ctxt->input->cur = in;
4588
924M
        if (*in == 0xD) {
4589
334M
            in++;
4590
334M
            if (*in == 0xA) {
4591
334M
                ctxt->input->cur = in;
4592
334M
                in++;
4593
334M
                ctxt->input->line++; ctxt->input->col = 1;
4594
334M
                continue; /* while */
4595
334M
            }
4596
277k
            in--;
4597
277k
        }
4598
590M
        if (*in == '<') {
4599
478M
            return;
4600
478M
        }
4601
111M
        if (*in == '&') {
4602
21.9M
            return;
4603
21.9M
        }
4604
89.6M
        SHRINK;
4605
89.6M
        GROW;
4606
89.6M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
89.6M
        in = ctxt->input->cur;
4609
423M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
423M
             (*in == 0x09) || (*in == 0x0a));
4611
89.7M
    ctxt->input->line = line;
4612
89.7M
    ctxt->input->col = col;
4613
89.7M
    xmlParseCharDataComplex(ctxt);
4614
89.7M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
89.7M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
89.7M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
89.7M
    int nbchar = 0;
4631
89.7M
    int cur, l;
4632
89.7M
    int count = 0;
4633
4634
89.7M
    SHRINK;
4635
89.7M
    GROW;
4636
89.7M
    cur = CUR_CHAR(l);
4637
1.60G
    while ((cur != '<') && /* checked */
4638
1.60G
           (cur != '&') &&
4639
1.60G
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
1.51G
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
19.5k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
19.5k
  }
4643
1.51G
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
1.51G
  NEXTL(l);
4646
1.51G
  cur = CUR_CHAR(l);
4647
1.51G
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
8.38M
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
8.38M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
7.28M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
7.28M
    } else {
4659
7.28M
        if (ctxt->sax->characters != NULL)
4660
7.28M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
7.28M
        if ((ctxt->sax->characters !=
4662
7.28M
             ctxt->sax->ignorableWhitespace) &&
4663
7.28M
      (*ctxt->space == -1))
4664
12.5k
      *ctxt->space = -2;
4665
7.28M
    }
4666
7.28M
      }
4667
8.38M
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
8.38M
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
8.38M
  }
4672
1.51G
  count++;
4673
1.51G
  if (count > 50) {
4674
22.0M
      SHRINK;
4675
22.0M
      GROW;
4676
22.0M
      count = 0;
4677
22.0M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
22.0M
  }
4680
1.51G
    }
4681
89.7M
    if (nbchar != 0) {
4682
67.7M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
67.7M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
60.5M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
1.37k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
1.37k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
60.5M
      } else {
4691
60.5M
    if (ctxt->sax->characters != NULL)
4692
60.5M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
60.5M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
60.5M
        (*ctxt->space == -1))
4695
253k
        *ctxt->space = -2;
4696
60.5M
      }
4697
60.5M
  }
4698
67.7M
    }
4699
89.7M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
85.0M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
85.0M
                          "PCDATA invalid Char value %d\n",
4703
85.0M
                    cur ? cur : CUR);
4704
85.0M
  NEXT;
4705
85.0M
    }
4706
89.7M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
13.4M
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
13.4M
    xmlChar *URI = NULL;
4735
4736
13.4M
    SHRINK;
4737
4738
13.4M
    *publicID = NULL;
4739
13.4M
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
1.50M
        SKIP(6);
4741
1.50M
  if (SKIP_BLANKS == 0) {
4742
991
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
991
                     "Space required after 'SYSTEM'\n");
4744
991
  }
4745
1.50M
  URI = xmlParseSystemLiteral(ctxt);
4746
1.50M
  if (URI == NULL) {
4747
2.86k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
2.86k
        }
4749
11.9M
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
11.2M
        SKIP(6);
4751
11.2M
  if (SKIP_BLANKS == 0) {
4752
1.16k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
1.16k
        "Space required after 'PUBLIC'\n");
4754
1.16k
  }
4755
11.2M
  *publicID = xmlParsePubidLiteral(ctxt);
4756
11.2M
  if (*publicID == NULL) {
4757
2.35k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
2.35k
  }
4759
11.2M
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
10.9M
      if (SKIP_BLANKS == 0) {
4764
39.2k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
39.2k
      "Space required after the Public Identifier\n");
4766
39.2k
      }
4767
10.9M
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
254k
      if (SKIP_BLANKS == 0) return(NULL);
4775
244
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
244
  }
4777
10.9M
  URI = xmlParseSystemLiteral(ctxt);
4778
10.9M
  if (URI == NULL) {
4779
40.9k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
40.9k
        }
4781
10.9M
    }
4782
13.1M
    return(URI);
4783
13.4M
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
11.4M
                       size_t len, size_t size) {
4802
11.4M
    int q, ql;
4803
11.4M
    int r, rl;
4804
11.4M
    int cur, l;
4805
11.4M
    size_t count = 0;
4806
11.4M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
234k
                       XML_MAX_HUGE_LENGTH :
4808
11.4M
                       XML_MAX_TEXT_LENGTH;
4809
11.4M
    int inputid;
4810
4811
11.4M
    inputid = ctxt->input->id;
4812
4813
11.4M
    if (buf == NULL) {
4814
1.92k
        len = 0;
4815
1.92k
  size = XML_PARSER_BUFFER_SIZE;
4816
1.92k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
1.92k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
1.92k
    }
4822
11.4M
    GROW; /* Assure there's enough input data */
4823
11.4M
    q = CUR_CHAR(ql);
4824
11.4M
    if (q == 0)
4825
11.2M
        goto not_terminated;
4826
252k
    if (!IS_CHAR(q)) {
4827
17.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
17.6k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
17.6k
                    q);
4830
17.6k
  xmlFree (buf);
4831
17.6k
  return;
4832
17.6k
    }
4833
235k
    NEXTL(ql);
4834
235k
    r = CUR_CHAR(rl);
4835
235k
    if (r == 0)
4836
230
        goto not_terminated;
4837
235k
    if (!IS_CHAR(r)) {
4838
6.80k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
6.80k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
6.80k
                    r);
4841
6.80k
  xmlFree (buf);
4842
6.80k
  return;
4843
6.80k
    }
4844
228k
    NEXTL(rl);
4845
228k
    cur = CUR_CHAR(l);
4846
228k
    if (cur == 0)
4847
168
        goto not_terminated;
4848
1.37G
    while (IS_CHAR(cur) && /* checked */
4849
1.37G
           ((cur != '>') ||
4850
1.37G
      (r != '-') || (q != '-'))) {
4851
1.37G
  if ((r == '-') && (q == '-')) {
4852
24.6M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
24.6M
  }
4854
1.37G
  if (len + 5 >= size) {
4855
227k
      xmlChar *new_buf;
4856
227k
            size_t new_size;
4857
4858
227k
      new_size = size * 2;
4859
227k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
227k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
227k
      buf = new_buf;
4866
227k
            size = new_size;
4867
227k
  }
4868
1.37G
  COPY_BUF(ql,buf,len,q);
4869
1.37G
  q = r;
4870
1.37G
  ql = rl;
4871
1.37G
  r = cur;
4872
1.37G
  rl = l;
4873
4874
1.37G
  count++;
4875
1.37G
  if (count > 50) {
4876
26.7M
      SHRINK;
4877
26.7M
      GROW;
4878
26.7M
      count = 0;
4879
26.7M
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
26.7M
  }
4884
1.37G
  NEXTL(l);
4885
1.37G
  cur = CUR_CHAR(l);
4886
1.37G
  if (cur == 0) {
4887
56.5k
      SHRINK;
4888
56.5k
      GROW;
4889
56.5k
      cur = CUR_CHAR(l);
4890
56.5k
  }
4891
4892
1.37G
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
1.37G
    }
4899
228k
    buf[len] = 0;
4900
228k
    if (cur == 0) {
4901
56.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
56.5k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
171k
    } else if (!IS_CHAR(cur)) {
4904
29.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
29.9k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
29.9k
                    cur);
4907
141k
    } else {
4908
141k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
141k
        NEXT;
4914
141k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
141k
      (!ctxt->disableSAX))
4916
122k
      ctxt->sax->comment(ctxt->userData, buf);
4917
141k
    }
4918
228k
    xmlFree(buf);
4919
228k
    return;
4920
11.2M
not_terminated:
4921
11.2M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
11.2M
       "Comment not terminated\n", NULL);
4923
11.2M
    xmlFree(buf);
4924
11.2M
    return;
4925
228k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
1.49G
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
1.49G
    xmlChar *buf = NULL;
4943
1.49G
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
1.49G
    size_t len = 0;
4945
1.49G
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
5.94M
                       XML_MAX_HUGE_LENGTH :
4947
1.49G
                       XML_MAX_TEXT_LENGTH;
4948
1.49G
    xmlParserInputState state;
4949
1.49G
    const xmlChar *in;
4950
1.49G
    size_t nbchar = 0;
4951
1.49G
    int ccol;
4952
1.49G
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
1.49G
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
1.49G
    SKIP(2);
4960
1.49G
    if ((RAW != '-') || (NXT(1) != '-'))
4961
2.17k
        return;
4962
1.49G
    state = ctxt->instate;
4963
1.49G
    ctxt->instate = XML_PARSER_COMMENT;
4964
1.49G
    inputid = ctxt->input->id;
4965
1.49G
    SKIP(2);
4966
1.49G
    SHRINK;
4967
1.49G
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
1.49G
    in = ctxt->input->cur;
4974
1.49G
    do {
4975
1.49G
  if (*in == 0xA) {
4976
2.74M
      do {
4977
2.74M
    ctxt->input->line++; ctxt->input->col = 1;
4978
2.74M
    in++;
4979
2.74M
      } while (*in == 0xA);
4980
2.72M
  }
4981
1.97G
get_more:
4982
1.97G
        ccol = ctxt->input->col;
4983
10.4G
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
10.4G
         ((*in >= 0x20) && (*in < '-')) ||
4985
10.4G
         (*in == 0x09)) {
4986
8.46G
        in++;
4987
8.46G
        ccol++;
4988
8.46G
  }
4989
1.97G
  ctxt->input->col = ccol;
4990
1.97G
  if (*in == 0xA) {
4991
62.0M
      do {
4992
62.0M
    ctxt->input->line++; ctxt->input->col = 1;
4993
62.0M
    in++;
4994
62.0M
      } while (*in == 0xA);
4995
60.7M
      goto get_more;
4996
60.7M
  }
4997
1.91G
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
1.91G
  if (nbchar > 0) {
5002
488M
      if ((ctxt->sax != NULL) &&
5003
488M
    (ctxt->sax->comment != NULL)) {
5004
488M
    if (buf == NULL) {
5005
82.3M
        if ((*in == '-') && (in[1] == '-'))
5006
53.7M
            size = nbchar + 1;
5007
28.5M
        else
5008
28.5M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
82.3M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
82.3M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
82.3M
        len = 0;
5016
405M
    } else if (len + nbchar + 1 >= size) {
5017
6.17M
        xmlChar *new_buf;
5018
6.17M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
6.17M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
6.17M
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
6.17M
        buf = new_buf;
5027
6.17M
    }
5028
488M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
488M
    len += nbchar;
5030
488M
    buf[len] = 0;
5031
488M
      }
5032
488M
  }
5033
1.91G
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
1.91G
  ctxt->input->cur = in;
5040
1.91G
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
1.91G
  if (*in == 0xD) {
5045
80.8M
      in++;
5046
80.8M
      if (*in == 0xA) {
5047
80.8M
    ctxt->input->cur = in;
5048
80.8M
    in++;
5049
80.8M
    ctxt->input->line++; ctxt->input->col = 1;
5050
80.8M
    goto get_more;
5051
80.8M
      }
5052
13.2k
      in--;
5053
13.2k
  }
5054
1.83G
  SHRINK;
5055
1.83G
  GROW;
5056
1.83G
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
1.83G
  in = ctxt->input->cur;
5061
1.83G
  if (*in == '-') {
5062
1.82G
      if (in[1] == '-') {
5063
1.72G
          if (in[2] == '>') {
5064
1.48G
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
1.48G
        SKIP(3);
5070
1.48G
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
1.48G
            (!ctxt->disableSAX)) {
5072
619M
      if (buf != NULL)
5073
29.1M
          ctxt->sax->comment(ctxt->userData, buf);
5074
589M
      else
5075
589M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
619M
        }
5077
1.48G
        if (buf != NULL)
5078
70.8M
            xmlFree(buf);
5079
1.48G
        if (ctxt->instate != XML_PARSER_EOF)
5080
1.48G
      ctxt->instate = state;
5081
1.48G
        return;
5082
1.48G
    }
5083
241M
    if (buf != NULL) {
5084
237M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
237M
                          "Double hyphen within comment: "
5086
237M
                                      "<!--%.50s\n",
5087
237M
              buf);
5088
237M
    } else
5089
3.91M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
3.91M
                          "Double hyphen within comment\n", NULL);
5091
241M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
241M
    in++;
5096
241M
    ctxt->input->col++;
5097
241M
      }
5098
334M
      in++;
5099
334M
      ctxt->input->col++;
5100
334M
      goto get_more;
5101
1.82G
  }
5102
1.83G
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
11.4M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
11.4M
    ctxt->instate = state;
5105
11.4M
    return;
5106
1.49G
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
2.66M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
2.66M
    const xmlChar *name;
5125
5126
2.66M
    name = xmlParseName(ctxt);
5127
2.66M
    if ((name != NULL) &&
5128
2.66M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
2.66M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
2.66M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
2.02M
  int i;
5132
2.02M
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
2.02M
      (name[2] == 'l') && (name[3] == 0)) {
5134
1.99M
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
1.99M
     "XML declaration allowed only at the start of the document\n");
5136
1.99M
      return(name);
5137
1.99M
  } else if (name[3] == 0) {
5138
755
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
755
      return(name);
5140
755
  }
5141
65.1k
  for (i = 0;;i++) {
5142
65.1k
      if (xmlW3CPIs[i] == NULL) break;
5143
50.0k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
19.8k
          return(name);
5145
50.0k
  }
5146
15.1k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
15.1k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
15.1k
          NULL, NULL);
5149
15.1k
    }
5150
646k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
11.5k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
11.5k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
11.5k
    }
5154
646k
    return(name);
5155
2.66M
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
0
    xmlChar *URL = NULL;
5176
0
    const xmlChar *tmp, *base;
5177
0
    xmlChar marker;
5178
5179
0
    tmp = catalog;
5180
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
0
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
0
error:
5211
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
0
            "Catalog PI syntax error: %s\n",
5213
0
      catalog, NULL);
5214
0
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
0
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
2.66M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
2.66M
    xmlChar *buf = NULL;
5235
2.66M
    size_t len = 0;
5236
2.66M
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
2.66M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
41.8k
                       XML_MAX_HUGE_LENGTH :
5239
2.66M
                       XML_MAX_TEXT_LENGTH;
5240
2.66M
    int cur, l;
5241
2.66M
    const xmlChar *target;
5242
2.66M
    xmlParserInputState state;
5243
2.66M
    int count = 0;
5244
5245
2.66M
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
2.66M
  int inputid = ctxt->input->id;
5247
2.66M
  state = ctxt->instate;
5248
2.66M
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
2.66M
  SKIP(2);
5253
2.66M
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
2.66M
        target = xmlParsePITarget(ctxt);
5260
2.66M
  if (target != NULL) {
5261
2.64M
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
29.1k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
29.1k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
29.1k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
29.1k
        (ctxt->sax->processingInstruction != NULL))
5274
12.1k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
12.1k
                                         target, NULL);
5276
29.1k
    if (ctxt->instate != XML_PARSER_EOF)
5277
29.1k
        ctxt->instate = state;
5278
29.1k
    return;
5279
29.1k
      }
5280
2.61M
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
2.61M
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
2.61M
      if (SKIP_BLANKS == 0) {
5287
355k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
355k
        "ParsePI: PI %s space expected\n", target);
5289
355k
      }
5290
2.61M
      cur = CUR_CHAR(l);
5291
2.10G
      while (IS_CHAR(cur) && /* checked */
5292
2.10G
       ((cur != '?') || (NXT(1) != '>'))) {
5293
2.10G
    if (len + 5 >= size) {
5294
207k
        xmlChar *tmp;
5295
207k
                    size_t new_size = size * 2;
5296
207k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
207k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
207k
        buf = tmp;
5304
207k
                    size = new_size;
5305
207k
    }
5306
2.10G
    count++;
5307
2.10G
    if (count > 50) {
5308
40.4M
        SHRINK;
5309
40.4M
        GROW;
5310
40.4M
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
40.4M
        count = 0;
5315
40.4M
    }
5316
2.10G
    COPY_BUF(l,buf,len,cur);
5317
2.10G
    NEXTL(l);
5318
2.10G
    cur = CUR_CHAR(l);
5319
2.10G
    if (cur == 0) {
5320
40.1k
        SHRINK;
5321
40.1k
        GROW;
5322
40.1k
        cur = CUR_CHAR(l);
5323
40.1k
    }
5324
2.10G
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
2.10G
      }
5332
2.61M
      buf[len] = 0;
5333
2.61M
      if (cur != '?') {
5334
59.6k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
59.6k
          "ParsePI: PI %s never end ...\n", target);
5336
2.55M
      } else {
5337
2.55M
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
2.55M
    SKIP(2);
5343
5344
2.55M
#ifdef LIBXML_CATALOG_ENABLED
5345
2.55M
    if (((state == XML_PARSER_MISC) ||
5346
2.55M
               (state == XML_PARSER_START)) &&
5347
2.55M
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
0
      (allow == XML_CATA_ALLOW_ALL))
5351
0
      xmlParseCatalogPI(ctxt, buf);
5352
0
    }
5353
2.55M
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
2.55M
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
2.55M
        (ctxt->sax->processingInstruction != NULL))
5361
250k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
250k
                                         target, buf);
5363
2.55M
      }
5364
2.61M
      xmlFree(buf);
5365
2.61M
  } else {
5366
18.0k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
18.0k
  }
5368
2.63M
  if (ctxt->instate != XML_PARSER_EOF)
5369
2.63M
      ctxt->instate = state;
5370
2.63M
    }
5371
2.66M
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
1.05M
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
1.05M
    const xmlChar *name;
5394
1.05M
    xmlChar *Pubid;
5395
1.05M
    xmlChar *Systemid;
5396
5397
1.05M
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
1.05M
    SKIP(2);
5400
5401
1.05M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
1.04M
  int inputid = ctxt->input->id;
5403
1.04M
  SHRINK;
5404
1.04M
  SKIP(8);
5405
1.04M
  if (SKIP_BLANKS == 0) {
5406
287
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
287
         "Space required after '<!NOTATION'\n");
5408
287
      return;
5409
287
  }
5410
5411
1.04M
        name = xmlParseName(ctxt);
5412
1.04M
  if (name == NULL) {
5413
456
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
456
      return;
5415
456
  }
5416
1.04M
  if (xmlStrchr(name, ':') != NULL) {
5417
117
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
117
         "colons are forbidden from notation names '%s'\n",
5419
117
         name, NULL, NULL);
5420
117
  }
5421
1.04M
  if (SKIP_BLANKS == 0) {
5422
150
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
150
         "Space required after the NOTATION name'\n");
5424
150
      return;
5425
150
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
1.04M
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
1.04M
  SKIP_BLANKS;
5432
5433
1.04M
  if (RAW == '>') {
5434
1.03M
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
1.03M
      NEXT;
5440
1.03M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
1.03M
    (ctxt->sax->notationDecl != NULL))
5442
373k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
1.03M
  } else {
5444
6.29k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
6.29k
  }
5446
1.04M
  if (Systemid != NULL) xmlFree(Systemid);
5447
1.04M
  if (Pubid != NULL) xmlFree(Pubid);
5448
1.04M
    }
5449
1.05M
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
21.4M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
21.4M
    const xmlChar *name = NULL;
5478
21.4M
    xmlChar *value = NULL;
5479
21.4M
    xmlChar *URI = NULL, *literal = NULL;
5480
21.4M
    const xmlChar *ndata = NULL;
5481
21.4M
    int isParameter = 0;
5482
21.4M
    xmlChar *orig = NULL;
5483
5484
21.4M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
21.4M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
21.4M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
21.4M
  int inputid = ctxt->input->id;
5491
21.4M
  SHRINK;
5492
21.4M
  SKIP(6);
5493
21.4M
  if (SKIP_BLANKS == 0) {
5494
5.25k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
5.25k
         "Space required after '<!ENTITY'\n");
5496
5.25k
  }
5497
5498
21.4M
  if (RAW == '%') {
5499
5.87M
      NEXT;
5500
5.87M
      if (SKIP_BLANKS == 0) {
5501
429
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
429
             "Space required after '%%'\n");
5503
429
      }
5504
5.87M
      isParameter = 1;
5505
5.87M
  }
5506
5507
21.4M
        name = xmlParseName(ctxt);
5508
21.4M
  if (name == NULL) {
5509
6.98k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
6.98k
                     "xmlParseEntityDecl: no name\n");
5511
6.98k
            return;
5512
6.98k
  }
5513
21.3M
  if (xmlStrchr(name, ':') != NULL) {
5514
1.15k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
1.15k
         "colons are forbidden from entities names '%s'\n",
5516
1.15k
         name, NULL, NULL);
5517
1.15k
  }
5518
21.3M
  if (SKIP_BLANKS == 0) {
5519
12.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
12.4k
         "Space required after the entity name\n");
5521
12.4k
  }
5522
5523
21.3M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
21.3M
  if (isParameter) {
5528
5.86M
      if ((RAW == '"') || (RAW == '\'')) {
5529
5.80M
          value = xmlParseEntityValue(ctxt, &orig);
5530
5.80M
    if (value) {
5531
5.77M
        if ((ctxt->sax != NULL) &&
5532
5.77M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
5.66M
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
5.66M
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
5.66M
            NULL, NULL, value);
5536
5.77M
    }
5537
5.80M
      } else {
5538
62.4k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
62.4k
    if ((URI == NULL) && (literal == NULL)) {
5540
5.10k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
5.10k
    }
5542
62.4k
    if (URI) {
5543
57.2k
        xmlURIPtr uri;
5544
5545
57.2k
        uri = xmlParseURI((const char *) URI);
5546
57.2k
        if (uri == NULL) {
5547
2.87k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
2.87k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
54.3k
        } else {
5555
54.3k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
85
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
54.3k
      } else {
5562
54.3k
          if ((ctxt->sax != NULL) &&
5563
54.3k
        (!ctxt->disableSAX) &&
5564
54.3k
        (ctxt->sax->entityDecl != NULL))
5565
53.6k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
53.6k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
53.6k
              literal, URI, NULL);
5568
54.3k
      }
5569
54.3k
      xmlFreeURI(uri);
5570
54.3k
        }
5571
57.2k
    }
5572
62.4k
      }
5573
15.5M
  } else {
5574
15.5M
      if ((RAW == '"') || (RAW == '\'')) {
5575
4.38M
          value = xmlParseEntityValue(ctxt, &orig);
5576
4.38M
    if ((ctxt->sax != NULL) &&
5577
4.38M
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
4.13M
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
4.13M
        XML_INTERNAL_GENERAL_ENTITY,
5580
4.13M
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
4.38M
    if ((ctxt->myDoc == NULL) ||
5585
4.38M
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
19.3k
        if (ctxt->myDoc == NULL) {
5587
4.05k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
4.05k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
4.05k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
4.05k
        }
5594
19.3k
        if (ctxt->myDoc->intSubset == NULL)
5595
4.05k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
4.05k
              BAD_CAST "fake", NULL, NULL);
5597
5598
19.3k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
19.3k
                    NULL, NULL, value);
5600
19.3k
    }
5601
11.1M
      } else {
5602
11.1M
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
11.1M
    if ((URI == NULL) && (literal == NULL)) {
5604
25.3k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
25.3k
    }
5606
11.1M
    if (URI) {
5607
11.1M
        xmlURIPtr uri;
5608
5609
11.1M
        uri = xmlParseURI((const char *)URI);
5610
11.1M
        if (uri == NULL) {
5611
10.0k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
10.0k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
11.0M
        } else {
5619
11.0M
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
999
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
999
      }
5626
11.0M
      xmlFreeURI(uri);
5627
11.0M
        }
5628
11.1M
    }
5629
11.1M
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
32.7k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
32.7k
           "Space required before 'NDATA'\n");
5632
32.7k
    }
5633
11.1M
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
2.02M
        SKIP(5);
5635
2.02M
        if (SKIP_BLANKS == 0) {
5636
666
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
666
               "Space required after 'NDATA'\n");
5638
666
        }
5639
2.02M
        ndata = xmlParseName(ctxt);
5640
2.02M
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
2.02M
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
1.01M
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
1.01M
            literal, URI, ndata);
5644
9.12M
    } else {
5645
9.12M
        if ((ctxt->sax != NULL) &&
5646
9.12M
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
6.46M
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
6.46M
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
6.46M
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
9.12M
        if ((ctxt->replaceEntities != 0) &&
5655
9.12M
      ((ctxt->myDoc == NULL) ||
5656
9.08M
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
2.73k
      if (ctxt->myDoc == NULL) {
5658
1.63k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.63k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.63k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.63k
      }
5665
5666
2.73k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.63k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.63k
            BAD_CAST "fake", NULL, NULL);
5669
2.73k
      xmlSAX2EntityDecl(ctxt, name,
5670
2.73k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
2.73k
                  literal, URI, NULL);
5672
2.73k
        }
5673
9.12M
    }
5674
11.1M
      }
5675
15.5M
  }
5676
21.3M
  if (ctxt->instate == XML_PARSER_EOF)
5677
4.47k
      goto done;
5678
21.3M
  SKIP_BLANKS;
5679
21.3M
  if (RAW != '>') {
5680
83.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
83.9k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
83.9k
      xmlHaltParser(ctxt);
5683
21.3M
  } else {
5684
21.3M
      if (inputid != ctxt->input->id) {
5685
15
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
15
                         "Entity declaration doesn't start and stop in"
5687
15
                               " the same entity\n");
5688
15
      }
5689
21.3M
      NEXT;
5690
21.3M
  }
5691
21.3M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
10.1M
      xmlEntityPtr cur = NULL;
5696
5697
10.1M
      if (isParameter) {
5698
5.78M
          if ((ctxt->sax != NULL) &&
5699
5.78M
        (ctxt->sax->getParameterEntity != NULL))
5700
5.78M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
5.78M
      } else {
5702
4.36M
          if ((ctxt->sax != NULL) &&
5703
4.36M
        (ctxt->sax->getEntity != NULL))
5704
4.36M
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
4.36M
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
194k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
194k
    }
5708
4.36M
      }
5709
10.1M
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
9.47M
    cur->orig = orig;
5711
9.47M
                orig = NULL;
5712
9.47M
      }
5713
10.1M
  }
5714
5715
21.3M
done:
5716
21.3M
  if (value != NULL) xmlFree(value);
5717
21.3M
  if (URI != NULL) xmlFree(URI);
5718
21.3M
  if (literal != NULL) xmlFree(literal);
5719
21.3M
        if (orig != NULL) xmlFree(orig);
5720
21.3M
    }
5721
21.4M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
53.7M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
53.7M
    int val;
5757
53.7M
    xmlChar *ret;
5758
5759
53.7M
    *value = NULL;
5760
53.7M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
2.61M
  SKIP(9);
5762
2.61M
  return(XML_ATTRIBUTE_REQUIRED);
5763
2.61M
    }
5764
51.1M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
46.3M
  SKIP(8);
5766
46.3M
  return(XML_ATTRIBUTE_IMPLIED);
5767
46.3M
    }
5768
4.78M
    val = XML_ATTRIBUTE_NONE;
5769
4.78M
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
2.50M
  SKIP(6);
5771
2.50M
  val = XML_ATTRIBUTE_FIXED;
5772
2.50M
  if (SKIP_BLANKS == 0) {
5773
476
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
476
         "Space required after '#FIXED'\n");
5775
476
  }
5776
2.50M
    }
5777
4.78M
    ret = xmlParseAttValue(ctxt);
5778
4.78M
    ctxt->instate = XML_PARSER_DTD;
5779
4.78M
    if (ret == NULL) {
5780
38.5k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
38.5k
           "Attribute default value declaration error\n");
5782
38.5k
    } else
5783
4.74M
        *value = ret;
5784
4.78M
    return(val);
5785
51.1M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
475k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
475k
    const xmlChar *name;
5809
475k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
475k
    if (RAW != '(') {
5812
562
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
562
  return(NULL);
5814
562
    }
5815
474k
    SHRINK;
5816
474k
    do {
5817
474k
        NEXT;
5818
474k
  SKIP_BLANKS;
5819
474k
        name = xmlParseName(ctxt);
5820
474k
  if (name == NULL) {
5821
102
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
102
         "Name expected in NOTATION declaration\n");
5823
102
            xmlFreeEnumeration(ret);
5824
102
      return(NULL);
5825
102
  }
5826
474k
  tmp = ret;
5827
474k
  while (tmp != NULL) {
5828
57
      if (xmlStrEqual(name, tmp->name)) {
5829
9
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
9
    "standalone: attribute notation value token %s duplicated\n",
5831
9
         name, NULL);
5832
9
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
9
    break;
5835
9
      }
5836
48
      tmp = tmp->next;
5837
48
  }
5838
474k
  if (tmp == NULL) {
5839
474k
      cur = xmlCreateEnumeration(name);
5840
474k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
474k
      if (last == NULL) ret = last = cur;
5845
36
      else {
5846
36
    last->next = cur;
5847
36
    last = cur;
5848
36
      }
5849
474k
  }
5850
474k
  SKIP_BLANKS;
5851
474k
    } while (RAW == '|');
5852
474k
    if (RAW != ')') {
5853
356
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
356
        xmlFreeEnumeration(ret);
5855
356
  return(NULL);
5856
356
    }
5857
474k
    NEXT;
5858
474k
    return(ret);
5859
474k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
10.0M
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
10.0M
    xmlChar *name;
5881
10.0M
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
10.0M
    if (RAW != '(') {
5884
46.2k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
46.2k
  return(NULL);
5886
46.2k
    }
5887
9.96M
    SHRINK;
5888
14.8M
    do {
5889
14.8M
        NEXT;
5890
14.8M
  SKIP_BLANKS;
5891
14.8M
        name = xmlParseNmtoken(ctxt);
5892
14.8M
  if (name == NULL) {
5893
621
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
621
      return(ret);
5895
621
  }
5896
14.8M
  tmp = ret;
5897
26.0M
  while (tmp != NULL) {
5898
11.1M
      if (xmlStrEqual(name, tmp->name)) {
5899
576
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
576
    "standalone: attribute enumeration value token %s duplicated\n",
5901
576
         name, NULL);
5902
576
    if (!xmlDictOwns(ctxt->dict, name))
5903
576
        xmlFree(name);
5904
576
    break;
5905
576
      }
5906
11.1M
      tmp = tmp->next;
5907
11.1M
  }
5908
14.8M
  if (tmp == NULL) {
5909
14.8M
      cur = xmlCreateEnumeration(name);
5910
14.8M
      if (!xmlDictOwns(ctxt->dict, name))
5911
14.8M
    xmlFree(name);
5912
14.8M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
14.8M
      if (last == NULL) ret = last = cur;
5917
4.89M
      else {
5918
4.89M
    last->next = cur;
5919
4.89M
    last = cur;
5920
4.89M
      }
5921
14.8M
  }
5922
14.8M
  SKIP_BLANKS;
5923
14.8M
    } while (RAW == '|');
5924
9.96M
    if (RAW != ')') {
5925
3.00k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
3.00k
  return(ret);
5927
3.00k
    }
5928
9.96M
    NEXT;
5929
9.96M
    return(ret);
5930
9.96M
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
10.4M
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
10.4M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
475k
  SKIP(8);
5953
475k
  if (SKIP_BLANKS == 0) {
5954
231
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
231
         "Space required after 'NOTATION'\n");
5956
231
      return(0);
5957
231
  }
5958
475k
  *tree = xmlParseNotationType(ctxt);
5959
475k
  if (*tree == NULL) return(0);
5960
474k
  return(XML_ATTRIBUTE_NOTATION);
5961
475k
    }
5962
10.0M
    *tree = xmlParseEnumerationType(ctxt);
5963
10.0M
    if (*tree == NULL) return(0);
5964
9.96M
    return(XML_ATTRIBUTE_ENUMERATION);
5965
10.0M
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
53.8M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
53.8M
    SHRINK;
6017
53.8M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
13.3M
  SKIP(5);
6019
13.3M
  return(XML_ATTRIBUTE_CDATA);
6020
40.4M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
4.67M
  SKIP(6);
6022
4.67M
  return(XML_ATTRIBUTE_IDREFS);
6023
35.7M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
2.38M
  SKIP(5);
6025
2.38M
  return(XML_ATTRIBUTE_IDREF);
6026
33.3M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
9.83M
        SKIP(2);
6028
9.83M
  return(XML_ATTRIBUTE_ID);
6029
23.5M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
54.5k
  SKIP(6);
6031
54.5k
  return(XML_ATTRIBUTE_ENTITY);
6032
23.5M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
326k
  SKIP(8);
6034
326k
  return(XML_ATTRIBUTE_ENTITIES);
6035
23.1M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
4.39M
  SKIP(8);
6037
4.39M
  return(XML_ATTRIBUTE_NMTOKENS);
6038
18.7M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
8.30M
  SKIP(7);
6040
8.30M
  return(XML_ATTRIBUTE_NMTOKEN);
6041
8.30M
     }
6042
10.4M
     return(xmlParseEnumeratedType(ctxt, tree));
6043
53.8M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
31.9M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
31.9M
    const xmlChar *elemName;
6061
31.9M
    const xmlChar *attrName;
6062
31.9M
    xmlEnumerationPtr tree;
6063
6064
31.9M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
31.9M
    SKIP(2);
6067
6068
31.9M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
31.9M
  int inputid = ctxt->input->id;
6070
6071
31.9M
  SKIP(7);
6072
31.9M
  if (SKIP_BLANKS == 0) {
6073
116k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
116k
                     "Space required after '<!ATTLIST'\n");
6075
116k
  }
6076
31.9M
        elemName = xmlParseName(ctxt);
6077
31.9M
  if (elemName == NULL) {
6078
2.06k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
2.06k
         "ATTLIST: no name for Element\n");
6080
2.06k
      return;
6081
2.06k
  }
6082
31.9M
  SKIP_BLANKS;
6083
31.9M
  GROW;
6084
85.5M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
53.8M
      int type;
6086
53.8M
      int def;
6087
53.8M
      xmlChar *defaultValue = NULL;
6088
6089
53.8M
      GROW;
6090
53.8M
            tree = NULL;
6091
53.8M
      attrName = xmlParseName(ctxt);
6092
53.8M
      if (attrName == NULL) {
6093
20.6k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
20.6k
             "ATTLIST: no name for Attribute\n");
6095
20.6k
    break;
6096
20.6k
      }
6097
53.8M
      GROW;
6098
53.8M
      if (SKIP_BLANKS == 0) {
6099
17.6k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
17.6k
            "Space required after the attribute name\n");
6101
17.6k
    break;
6102
17.6k
      }
6103
6104
53.8M
      type = xmlParseAttributeType(ctxt, &tree);
6105
53.8M
      if (type <= 0) {
6106
48.0k
          break;
6107
48.0k
      }
6108
6109
53.7M
      GROW;
6110
53.7M
      if (SKIP_BLANKS == 0) {
6111
17.4k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
17.4k
             "Space required after the attribute type\n");
6113
17.4k
          if (tree != NULL)
6114
3.28k
        xmlFreeEnumeration(tree);
6115
17.4k
    break;
6116
17.4k
      }
6117
6118
53.7M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
53.7M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
53.7M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
815k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
53.7M
      GROW;
6130
53.7M
            if (RAW != '>') {
6131
31.6M
    if (SKIP_BLANKS == 0) {
6132
149k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
149k
      "Space required after the attribute default value\n");
6134
149k
        if (defaultValue != NULL)
6135
109k
      xmlFree(defaultValue);
6136
149k
        if (tree != NULL)
6137
99.1k
      xmlFreeEnumeration(tree);
6138
149k
        break;
6139
149k
    }
6140
31.6M
      }
6141
53.6M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
53.6M
    (ctxt->sax->attributeDecl != NULL))
6143
36.5M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
36.5M
                          type, def, defaultValue, tree);
6145
17.0M
      else if (tree != NULL)
6146
5.09M
    xmlFreeEnumeration(tree);
6147
6148
53.6M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
53.6M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
53.6M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
4.57M
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
4.57M
      }
6153
53.6M
      if (ctxt->sax2) {
6154
53.0M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
53.0M
      }
6156
53.6M
      if (defaultValue != NULL)
6157
4.63M
          xmlFree(defaultValue);
6158
53.6M
      GROW;
6159
53.6M
  }
6160
31.9M
  if (RAW == '>') {
6161
31.6M
      if (inputid != ctxt->input->id) {
6162
102
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
102
                               "Attribute list declaration doesn't start and"
6164
102
                               " stop in the same entity\n");
6165
102
      }
6166
31.6M
      NEXT;
6167
31.6M
  }
6168
31.9M
    }
6169
31.9M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
5.79M
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
5.79M
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
5.79M
    const xmlChar *elem = NULL;
6196
6197
5.79M
    GROW;
6198
5.79M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
5.79M
  SKIP(7);
6200
5.79M
  SKIP_BLANKS;
6201
5.79M
  SHRINK;
6202
5.79M
  if (RAW == ')') {
6203
3.29M
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
3.29M
      NEXT;
6209
3.29M
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
3.29M
      if (ret == NULL)
6211
0
          return(NULL);
6212
3.29M
      if (RAW == '*') {
6213
88
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
88
    NEXT;
6215
88
      }
6216
3.29M
      return(ret);
6217
3.29M
  }
6218
2.49M
  if ((RAW == '(') || (RAW == '|')) {
6219
2.49M
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
2.49M
      if (ret == NULL) return(NULL);
6221
2.49M
  }
6222
27.8M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
25.3M
      NEXT;
6224
25.3M
      if (elem == NULL) {
6225
2.49M
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
2.49M
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
2.49M
    ret->c1 = cur;
6231
2.49M
    if (cur != NULL)
6232
2.49M
        cur->parent = ret;
6233
2.49M
    cur = ret;
6234
22.8M
      } else {
6235
22.8M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
22.8M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
22.8M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
22.8M
    if (n->c1 != NULL)
6242
22.8M
        n->c1->parent = n;
6243
22.8M
          cur->c2 = n;
6244
22.8M
    if (n != NULL)
6245
22.8M
        n->parent = cur;
6246
22.8M
    cur = n;
6247
22.8M
      }
6248
25.3M
      SKIP_BLANKS;
6249
25.3M
      elem = xmlParseName(ctxt);
6250
25.3M
      if (elem == NULL) {
6251
486
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
486
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
486
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
486
    return(NULL);
6255
486
      }
6256
25.3M
      SKIP_BLANKS;
6257
25.3M
      GROW;
6258
25.3M
  }
6259
2.49M
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
2.49M
      if (elem != NULL) {
6261
2.49M
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
2.49M
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
2.49M
    if (cur->c2 != NULL)
6264
2.49M
        cur->c2->parent = cur;
6265
2.49M
            }
6266
2.49M
            if (ret != NULL)
6267
2.49M
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
2.49M
      if (ctxt->input->id != inputchk) {
6269
12
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
12
                               "Element content declaration doesn't start and"
6271
12
                               " stop in the same entity\n");
6272
12
      }
6273
2.49M
      SKIP(2);
6274
2.49M
  } else {
6275
3.82k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
3.82k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
3.82k
      return(NULL);
6278
3.82k
  }
6279
6280
2.49M
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
2.49M
    return(ret);
6284
5.79M
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
6.58M
                                       int depth) {
6321
6.58M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
6.58M
    const xmlChar *elem;
6323
6.58M
    xmlChar type = 0;
6324
6325
6.58M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
6.58M
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
6.58M
    SKIP_BLANKS;
6333
6.58M
    GROW;
6334
6.58M
    if (RAW == '(') {
6335
184k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
184k
  NEXT;
6339
184k
  SKIP_BLANKS;
6340
184k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
184k
                                                           depth + 1);
6342
184k
        if (cur == NULL)
6343
457
            return(NULL);
6344
184k
  SKIP_BLANKS;
6345
184k
  GROW;
6346
6.40M
    } else {
6347
6.40M
  elem = xmlParseName(ctxt);
6348
6.40M
  if (elem == NULL) {
6349
24.5k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
24.5k
      return(NULL);
6351
24.5k
  }
6352
6.37M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
6.37M
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
6.37M
  GROW;
6358
6.37M
  if (RAW == '?') {
6359
489k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
489k
      NEXT;
6361
5.89M
  } else if (RAW == '*') {
6362
408k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
408k
      NEXT;
6364
5.48M
  } else if (RAW == '+') {
6365
1.45M
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
1.45M
      NEXT;
6367
4.02M
  } else {
6368
4.02M
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
4.02M
  }
6370
6.37M
  GROW;
6371
6.37M
    }
6372
6.56M
    SKIP_BLANKS;
6373
6.56M
    SHRINK;
6374
31.5M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
25.0M
        if (RAW == ',') {
6379
5.68M
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
3.42M
      else if (type != CUR) {
6385
158
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
158
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
158
                      type);
6388
158
    if ((last != NULL) && (last != ret))
6389
158
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
158
    if (ret != NULL)
6391
158
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
158
    return(NULL);
6393
158
      }
6394
5.68M
      NEXT;
6395
6396
5.68M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
5.68M
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
5.68M
      if (last == NULL) {
6404
2.26M
    op->c1 = ret;
6405
2.26M
    if (ret != NULL)
6406
2.26M
        ret->parent = op;
6407
2.26M
    ret = cur = op;
6408
3.42M
      } else {
6409
3.42M
          cur->c2 = op;
6410
3.42M
    if (op != NULL)
6411
3.42M
        op->parent = cur;
6412
3.42M
    op->c1 = last;
6413
3.42M
    if (last != NULL)
6414
3.42M
        last->parent = op;
6415
3.42M
    cur =op;
6416
3.42M
    last = NULL;
6417
3.42M
      }
6418
19.3M
  } else if (RAW == '|') {
6419
19.3M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
17.0M
      else if (type != CUR) {
6425
192
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
192
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
192
          type);
6428
192
    if ((last != NULL) && (last != ret))
6429
192
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
192
    if (ret != NULL)
6431
192
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
192
    return(NULL);
6433
192
      }
6434
19.3M
      NEXT;
6435
6436
19.3M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
19.3M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
19.3M
      if (last == NULL) {
6445
2.24M
    op->c1 = ret;
6446
2.24M
    if (ret != NULL)
6447
2.24M
        ret->parent = op;
6448
2.24M
    ret = cur = op;
6449
17.0M
      } else {
6450
17.0M
          cur->c2 = op;
6451
17.0M
    if (op != NULL)
6452
17.0M
        op->parent = cur;
6453
17.0M
    op->c1 = last;
6454
17.0M
    if (last != NULL)
6455
17.0M
        last->parent = op;
6456
17.0M
    cur =op;
6457
17.0M
    last = NULL;
6458
17.0M
      }
6459
19.3M
  } else {
6460
19.4k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
19.4k
      if ((last != NULL) && (last != ret))
6462
8.28k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
19.4k
      if (ret != NULL)
6464
19.4k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
19.4k
      return(NULL);
6466
19.4k
  }
6467
24.9M
  GROW;
6468
24.9M
  SKIP_BLANKS;
6469
24.9M
  GROW;
6470
24.9M
  if (RAW == '(') {
6471
1.06M
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
1.06M
      NEXT;
6474
1.06M
      SKIP_BLANKS;
6475
1.06M
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
1.06M
                                                          depth + 1);
6477
1.06M
            if (last == NULL) {
6478
1.87k
    if (ret != NULL)
6479
1.87k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
1.87k
    return(NULL);
6481
1.87k
            }
6482
1.06M
      SKIP_BLANKS;
6483
23.9M
  } else {
6484
23.9M
      elem = xmlParseName(ctxt);
6485
23.9M
      if (elem == NULL) {
6486
2.96k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.96k
    if (ret != NULL)
6488
2.96k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.96k
    return(NULL);
6490
2.96k
      }
6491
23.9M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
23.9M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
23.9M
      if (RAW == '?') {
6498
1.97M
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
1.97M
    NEXT;
6500
21.9M
      } else if (RAW == '*') {
6501
1.18M
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
1.18M
    NEXT;
6503
20.7M
      } else if (RAW == '+') {
6504
353k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
353k
    NEXT;
6506
20.4M
      } else {
6507
20.4M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
20.4M
      }
6509
23.9M
  }
6510
24.9M
  SKIP_BLANKS;
6511
24.9M
  GROW;
6512
24.9M
    }
6513
6.53M
    if ((cur != NULL) && (last != NULL)) {
6514
4.49M
        cur->c2 = last;
6515
4.49M
  if (last != NULL)
6516
4.49M
      last->parent = cur;
6517
4.49M
    }
6518
6.53M
    if (ctxt->input->id != inputchk) {
6519
39
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
39
                       "Element content declaration doesn't start and stop in"
6521
39
                       " the same entity\n");
6522
39
    }
6523
6.53M
    NEXT;
6524
6.53M
    if (RAW == '?') {
6525
114k
  if (ret != NULL) {
6526
114k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
114k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
297
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
114k
      else
6530
114k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
114k
  }
6532
114k
  NEXT;
6533
6.42M
    } else if (RAW == '*') {
6534
1.33M
  if (ret != NULL) {
6535
1.33M
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
1.33M
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
11.9M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
10.6M
    if ((cur->c1 != NULL) &&
6543
10.6M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
10.6M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
175k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
10.6M
    if ((cur->c2 != NULL) &&
6547
10.6M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
10.6M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
28.9k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
10.6M
    cur = cur->c2;
6551
10.6M
      }
6552
1.33M
  }
6553
1.33M
  NEXT;
6554
5.09M
    } else if (RAW == '+') {
6555
1.10M
  if (ret != NULL) {
6556
1.10M
      int found = 0;
6557
6558
1.10M
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
1.10M
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
27
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
1.10M
      else
6562
1.10M
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
1.95M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
853k
    if ((cur->c1 != NULL) &&
6570
853k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
853k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
0
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
0
        found = 1;
6574
0
    }
6575
853k
    if ((cur->c2 != NULL) &&
6576
853k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
853k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
9
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
9
        found = 1;
6580
9
    }
6581
853k
    cur = cur->c2;
6582
853k
      }
6583
1.10M
      if (found)
6584
9
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
1.10M
  }
6586
1.10M
  NEXT;
6587
1.10M
    }
6588
6.53M
    return(ret);
6589
6.56M
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
11.1M
                           xmlElementContentPtr *result) {
6648
6649
11.1M
    xmlElementContentPtr tree = NULL;
6650
11.1M
    int inputid = ctxt->input->id;
6651
11.1M
    int res;
6652
6653
11.1M
    *result = NULL;
6654
6655
11.1M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
11.1M
    NEXT;
6661
11.1M
    GROW;
6662
11.1M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
11.1M
    SKIP_BLANKS;
6665
11.1M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
5.79M
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
5.79M
  res = XML_ELEMENT_TYPE_MIXED;
6668
5.79M
    } else {
6669
5.33M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
5.33M
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
5.33M
    }
6672
11.1M
    SKIP_BLANKS;
6673
11.1M
    *result = tree;
6674
11.1M
    return(res);
6675
11.1M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
13.3M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
13.3M
    const xmlChar *name;
6695
13.3M
    int ret = -1;
6696
13.3M
    xmlElementContentPtr content  = NULL;
6697
6698
13.3M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
13.3M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
13.3M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
13.2M
  int inputid = ctxt->input->id;
6705
6706
13.2M
  SKIP(7);
6707
13.2M
  if (SKIP_BLANKS == 0) {
6708
1.83k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
1.83k
               "Space required after 'ELEMENT'\n");
6710
1.83k
      return(-1);
6711
1.83k
  }
6712
13.2M
        name = xmlParseName(ctxt);
6713
13.2M
  if (name == NULL) {
6714
2.41k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
2.41k
         "xmlParseElementDecl: no name for Element\n");
6716
2.41k
      return(-1);
6717
2.41k
  }
6718
13.2M
  if (SKIP_BLANKS == 0) {
6719
6.48k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
6.48k
         "Space required after the element name\n");
6721
6.48k
  }
6722
13.2M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
2.08M
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
2.08M
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
11.2M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
11.2M
             (NXT(2) == 'Y')) {
6730
48.4k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
48.4k
      ret = XML_ELEMENT_TYPE_ANY;
6735
11.1M
  } else if (RAW == '(') {
6736
11.1M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
11.1M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
22.5k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
22.5k
          (ctxt->inputNr == 1)) {
6743
181
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
181
    "PEReference: forbidden within markup decl in internal subset\n");
6745
22.3k
      } else {
6746
22.3k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
22.3k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
22.3k
            }
6749
22.5k
      return(-1);
6750
22.5k
  }
6751
6752
13.2M
  SKIP_BLANKS;
6753
6754
13.2M
  if (RAW != '>') {
6755
55.2k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
55.2k
      if (content != NULL) {
6757
4.94k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
4.94k
      }
6759
13.2M
  } else {
6760
13.2M
      if (inputid != ctxt->input->id) {
6761
192
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
192
                               "Element declaration doesn't start and stop in"
6763
192
                               " the same entity\n");
6764
192
      }
6765
6766
13.2M
      NEXT;
6767
13.2M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
13.2M
    (ctxt->sax->elementDecl != NULL)) {
6769
12.3M
    if (content != NULL)
6770
10.6M
        content->parent = NULL;
6771
12.3M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
12.3M
                           content);
6773
12.3M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
119k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
119k
    }
6782
12.3M
      } else if (content != NULL) {
6783
421k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
421k
      }
6785
13.2M
  }
6786
13.2M
    }
6787
13.2M
    return(ret);
6788
13.3M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
540k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
540k
    int *inputIds = NULL;
6806
540k
    size_t inputIdsSize = 0;
6807
540k
    size_t depth = 0;
6808
6809
726k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
725k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
641k
            int id = ctxt->input->id;
6812
6813
641k
            SKIP(3);
6814
641k
            SKIP_BLANKS;
6815
6816
641k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
125k
                SKIP(7);
6818
125k
                SKIP_BLANKS;
6819
125k
                if (RAW != '[') {
6820
2.25k
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
2.25k
                    xmlHaltParser(ctxt);
6822
2.25k
                    goto error;
6823
2.25k
                }
6824
123k
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
123k
                NEXT;
6830
6831
123k
                if (inputIdsSize <= depth) {
6832
39.8k
                    int *tmp;
6833
6834
39.8k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
39.8k
                    tmp = (int *) xmlRealloc(inputIds,
6836
39.8k
                            inputIdsSize * sizeof(int));
6837
39.8k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
39.8k
                    inputIds = tmp;
6842
39.8k
                }
6843
123k
                inputIds[depth] = id;
6844
123k
                depth++;
6845
516k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
505k
                size_t ignoreDepth = 0;
6847
6848
505k
                SKIP(6);
6849
505k
                SKIP_BLANKS;
6850
505k
                if (RAW != '[') {
6851
427
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
427
                    xmlHaltParser(ctxt);
6853
427
                    goto error;
6854
427
                }
6855
505k
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
505k
                NEXT;
6861
6862
170M
                while (RAW != 0) {
6863
170M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
23.4k
                        SKIP(3);
6865
23.4k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
23.4k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
170M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
170M
                               (NXT(2) == '>')) {
6873
521k
                        if (ignoreDepth == 0)
6874
502k
                            break;
6875
18.8k
                        SKIP(3);
6876
18.8k
                        ignoreDepth--;
6877
169M
                    } else {
6878
169M
                        NEXT;
6879
169M
                    }
6880
170M
                }
6881
6882
505k
    if (RAW == 0) {
6883
2.48k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.48k
                    goto error;
6885
2.48k
    }
6886
502k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
502k
                SKIP(3);
6892
502k
            } else {
6893
10.9k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
10.9k
                xmlHaltParser(ctxt);
6895
10.9k
                goto error;
6896
10.9k
            }
6897
641k
        } else if ((depth > 0) &&
6898
84.1k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
37.3k
            depth--;
6900
37.3k
            if (ctxt->input->id != inputIds[depth]) {
6901
785
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
785
                               "All markup of the conditional section is not"
6903
785
                               " in the same entity\n");
6904
785
            }
6905
37.3k
            SKIP(3);
6906
46.7k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
31.9k
            xmlParseMarkupDecl(ctxt);
6908
31.9k
        } else {
6909
14.8k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
14.8k
            xmlHaltParser(ctxt);
6911
14.8k
            goto error;
6912
14.8k
        }
6913
6914
694k
        if (depth == 0)
6915
508k
            break;
6916
6917
185k
        SKIP_BLANKS;
6918
185k
        GROW;
6919
185k
    }
6920
6921
540k
error:
6922
540k
    xmlFree(inputIds);
6923
540k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
1.56G
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
1.56G
    GROW;
6952
1.56G
    if (CUR == '<') {
6953
1.56G
        if (NXT(1) == '!') {
6954
1.56G
      switch (NXT(2)) {
6955
34.7M
          case 'E':
6956
34.7M
        if (NXT(3) == 'L')
6957
13.3M
      xmlParseElementDecl(ctxt);
6958
21.4M
        else if (NXT(3) == 'N')
6959
21.4M
      xmlParseEntityDecl(ctxt);
6960
1.45k
                    else
6961
1.45k
                        SKIP(2);
6962
34.7M
        break;
6963
31.9M
          case 'A':
6964
31.9M
        xmlParseAttributeListDecl(ctxt);
6965
31.9M
        break;
6966
1.05M
          case 'N':
6967
1.05M
        xmlParseNotationDecl(ctxt);
6968
1.05M
        break;
6969
1.49G
          case '-':
6970
1.49G
        xmlParseComment(ctxt);
6971
1.49G
        break;
6972
830k
    default:
6973
        /* there is an error but it will be detected later */
6974
830k
                    SKIP(2);
6975
830k
        break;
6976
1.56G
      }
6977
1.56G
  } else if (NXT(1) == '?') {
6978
2.30M
      xmlParsePI(ctxt);
6979
2.30M
  }
6980
1.56G
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
1.56G
    if (ctxt->instate == XML_PARSER_EOF)
6987
88.4k
        return;
6988
6989
1.56G
    ctxt->instate = XML_PARSER_DTD;
6990
1.56G
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
43.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
43.9k
    xmlChar *version;
7006
43.9k
    const xmlChar *encoding;
7007
43.9k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
43.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
41.9k
  SKIP(5);
7014
41.9k
    } else {
7015
1.95k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
1.95k
  return;
7017
1.95k
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
41.9k
    oldstate = ctxt->instate;
7021
41.9k
    ctxt->instate = XML_PARSER_START;
7022
7023
41.9k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
41.9k
    version = xmlParseVersionInfo(ctxt);
7032
41.9k
    if (version == NULL)
7033
3.03k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
38.9k
    else {
7035
38.9k
  if (SKIP_BLANKS == 0) {
7036
1.47k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.47k
               "Space needed here\n");
7038
1.47k
  }
7039
38.9k
    }
7040
41.9k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
41.9k
    encoding = xmlParseEncodingDecl(ctxt);
7046
41.9k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
41.9k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
391
        ctxt->instate = oldstate;
7053
391
        return;
7054
391
    }
7055
41.5k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
4.14k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
4.14k
           "Missing encoding in text declaration\n");
7058
4.14k
    }
7059
7060
41.5k
    SKIP_BLANKS;
7061
41.5k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
32.5k
        SKIP(2);
7063
32.5k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
138
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
138
  NEXT;
7067
8.85k
    } else {
7068
8.85k
        int c;
7069
7070
8.85k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
5.73M
        while ((c = CUR) != 0) {
7072
5.73M
            NEXT;
7073
5.73M
            if (c == '>')
7074
8.32k
                break;
7075
5.73M
        }
7076
8.85k
    }
7077
7078
41.5k
    ctxt->instate = oldstate;
7079
41.5k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
243k
                       const xmlChar *SystemID) {
7096
243k
    xmlDetectSAX2(ctxt);
7097
243k
    GROW;
7098
7099
243k
    if ((ctxt->encoding == NULL) &&
7100
243k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
243k
        xmlChar start[4];
7102
243k
  xmlCharEncoding enc;
7103
7104
243k
  start[0] = RAW;
7105
243k
  start[1] = NXT(1);
7106
243k
  start[2] = NXT(2);
7107
243k
  start[3] = NXT(3);
7108
243k
  enc = xmlDetectCharEncoding(start, 4);
7109
243k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
42.3k
      xmlSwitchEncoding(ctxt, enc);
7111
243k
    }
7112
7113
243k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
39.2k
  xmlParseTextDecl(ctxt);
7115
39.2k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
286
      xmlHaltParser(ctxt);
7120
286
      return;
7121
286
  }
7122
39.2k
    }
7123
243k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
243k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
243k
    ctxt->instate = XML_PARSER_DTD;
7135
243k
    ctxt->external = 1;
7136
243k
    SKIP_BLANKS;
7137
691M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
691M
  GROW;
7139
691M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
540k
            xmlParseConditionalSections(ctxt);
7141
691M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
691M
            xmlParseMarkupDecl(ctxt);
7143
691M
        } else {
7144
71.2k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
71.2k
            xmlHaltParser(ctxt);
7146
71.2k
            return;
7147
71.2k
        }
7148
691M
        SKIP_BLANKS;
7149
691M
    }
7150
7151
172k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
172k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
58.1M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
58.1M
    xmlEntityPtr ent;
7175
58.1M
    xmlChar *val;
7176
58.1M
    int was_checked;
7177
58.1M
    xmlNodePtr list = NULL;
7178
58.1M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
58.1M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
58.1M
    if (NXT(1) == '#') {
7188
1.13M
  int i = 0;
7189
1.13M
  xmlChar out[16];
7190
1.13M
  int hex = NXT(2);
7191
1.13M
  int value = xmlParseCharRef(ctxt);
7192
7193
1.13M
  if (value == 0)
7194
55.7k
      return;
7195
1.07M
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
627k
      if (value <= 0xFF) {
7202
622k
    out[0] = value;
7203
622k
    out[1] = 0;
7204
622k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
622k
        (!ctxt->disableSAX))
7206
507k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
622k
      } else {
7208
5.34k
    if ((hex == 'x') || (hex == 'X'))
7209
1.49k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
3.84k
    else
7211
3.84k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
5.34k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
5.34k
        (!ctxt->disableSAX))
7214
4.91k
        ctxt->sax->reference(ctxt->userData, out);
7215
5.34k
      }
7216
627k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
452k
      COPY_BUF(0 ,out, i, value);
7221
452k
      out[i] = 0;
7222
452k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
452k
    (!ctxt->disableSAX))
7224
275k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
452k
  }
7226
1.07M
  return;
7227
1.13M
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
56.9M
    ent = xmlParseEntityRef(ctxt);
7233
56.9M
    if (ent == NULL) return;
7234
41.1M
    if (!ctxt->wellFormed)
7235
14.9M
  return;
7236
26.1M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
26.1M
    if ((ent->name == NULL) ||
7240
26.1M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
3.95M
  val = ent->content;
7242
3.95M
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
3.95M
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
3.95M
      (!ctxt->disableSAX))
7248
3.95M
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
3.95M
  return;
7250
3.95M
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
22.2M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
22.2M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
907k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
902k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
902k
  void *user_data;
7273
902k
  if (ctxt->userData == ctxt)
7274
902k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
902k
        ctxt->sizeentcopy = 0;
7280
7281
902k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
1.12k
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
1.12k
            xmlHaltParser(ctxt);
7284
1.12k
            return;
7285
1.12k
        }
7286
7287
901k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
901k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
770k
      ctxt->depth++;
7297
770k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
770k
                                                user_data, &list);
7299
770k
      ctxt->depth--;
7300
7301
770k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
130k
      ctxt->depth++;
7303
130k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
130k
                                     user_data, ctxt->depth, ent->URI,
7305
130k
             ent->ExternalID, &list);
7306
130k
      ctxt->depth--;
7307
130k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
901k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
901k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
901k
        ent->expandedSize = ctxt->sizeentcopy;
7316
901k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
14.0k
            xmlHaltParser(ctxt);
7318
14.0k
      xmlFreeNodeList(list);
7319
14.0k
      return;
7320
14.0k
  }
7321
887k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
3
      xmlFreeNodeList(list);
7323
3
      return;
7324
3
  }
7325
7326
887k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
802k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
802k
            if ((ctxt->replaceEntities == 0) ||
7333
802k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
802k
                ((list->type == XML_TEXT_NODE) &&
7335
762k
                 (list->next == NULL))) {
7336
762k
                ent->owner = 1;
7337
43.6M
                while (list != NULL) {
7338
42.8M
                    list->parent = (xmlNodePtr) ent;
7339
42.8M
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
42.8M
                    if (list->next == NULL)
7342
762k
                        ent->last = list;
7343
42.8M
                    list = list->next;
7344
42.8M
                }
7345
762k
                list = NULL;
7346
762k
            } else {
7347
40.0k
                ent->owner = 0;
7348
90.3M
                while (list != NULL) {
7349
90.3M
                    list->parent = (xmlNodePtr) ctxt->node;
7350
90.3M
                    list->doc = ctxt->myDoc;
7351
90.3M
                    if (list->next == NULL)
7352
40.0k
                        ent->last = list;
7353
90.3M
                    list = list->next;
7354
90.3M
                }
7355
40.0k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
40.0k
            }
7361
802k
  } else if ((ret != XML_ERR_OK) &&
7362
84.9k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
48.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
48.0k
         "Entity '%s' failed to parse\n", ent->name);
7365
48.0k
            if (ent->content != NULL)
7366
6.41k
                ent->content[0] = 0;
7367
48.0k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
887k
        was_checked = 0;
7374
887k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
22.2M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
418k
  if (was_checked != 0) {
7389
329k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
329k
      if (ctxt->userData == ctxt)
7396
329k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
329k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
0
    ctxt->depth++;
7402
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
0
           ent->content, user_data, NULL);
7404
0
    ctxt->depth--;
7405
329k
      } else if (ent->etype ==
7406
329k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
329k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
329k
    ctxt->depth++;
7410
329k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
329k
         ctxt->sax, user_data, ctxt->depth,
7412
329k
         ent->URI, ent->ExternalID, NULL);
7413
329k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
329k
                ctxt->sizeentities = oldsizeentities;
7417
329k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
329k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
329k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
329k
  }
7429
418k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
418k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
32.4k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
32.4k
  }
7437
418k
  return;
7438
418k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
21.8M
    if ((was_checked != 0) &&
7445
21.8M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
1.78k
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
21.8M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
21.8M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
136k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
136k
  return;
7458
136k
    }
7459
7460
21.6M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
21.6M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
21.6M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
21.6M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
6.78M
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
6.78M
    cur = ent->children;
7492
48.9M
    while (cur != NULL) {
7493
48.9M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
48.9M
        if (nw != NULL) {
7495
48.9M
      if (nw->_private == NULL)
7496
48.9M
          nw->_private = cur->_private;
7497
48.9M
      if (firstChild == NULL){
7498
6.78M
          firstChild = nw;
7499
6.78M
      }
7500
48.9M
      nw = xmlAddChild(ctxt->node, nw);
7501
48.9M
        }
7502
48.9M
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
6.78M
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
6.78M
          (nw != NULL) &&
7509
6.78M
          (nw->type == XML_ELEMENT_NODE) &&
7510
6.78M
          (nw->children == NULL))
7511
2.05k
          nw->extra = 1;
7512
7513
6.78M
      break;
7514
6.78M
        }
7515
42.2M
        cur = cur->next;
7516
42.2M
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
14.8M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
14.8M
    xmlNodePtr nw = NULL, cur, next, last,
7523
14.8M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
14.8M
    cur = ent->children;
7532
14.8M
    ent->children = NULL;
7533
14.8M
    last = ent->last;
7534
14.8M
    ent->last = NULL;
7535
244M
    while (cur != NULL) {
7536
244M
        next = cur->next;
7537
244M
        cur->next = NULL;
7538
244M
        cur->parent = NULL;
7539
244M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
244M
        if (nw != NULL) {
7541
244M
      if (nw->_private == NULL)
7542
244M
          nw->_private = cur->_private;
7543
244M
      if (firstChild == NULL){
7544
14.8M
          firstChild = cur;
7545
14.8M
      }
7546
244M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
244M
        }
7548
244M
        xmlAddChild(ctxt->node, cur);
7549
244M
        if (cur == last)
7550
14.8M
      break;
7551
230M
        cur = next;
7552
230M
    }
7553
14.8M
    if (ent->owner == 0)
7554
40.0k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
14.8M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
21.6M
      ctxt->nodemem = 0;
7582
21.6M
      ctxt->nodelen = 0;
7583
21.6M
      return;
7584
21.6M
  }
7585
21.6M
    }
7586
21.6M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
121M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
121M
    const xmlChar *name;
7621
121M
    xmlEntityPtr ent = NULL;
7622
7623
121M
    GROW;
7624
121M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
121M
    if (RAW != '&')
7628
0
        return(NULL);
7629
121M
    NEXT;
7630
121M
    name = xmlParseName(ctxt);
7631
121M
    if (name == NULL) {
7632
420k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
420k
           "xmlParseEntityRef: no name\n");
7634
420k
        return(NULL);
7635
420k
    }
7636
120M
    if (RAW != ';') {
7637
350k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
350k
  return(NULL);
7639
350k
    }
7640
120M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
120M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
110M
        ent = xmlGetPredefinedEntity(name);
7647
110M
        if (ent != NULL)
7648
7.46M
            return(ent);
7649
110M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
112M
    if (ctxt->sax != NULL) {
7656
112M
  if (ctxt->sax->getEntity != NULL)
7657
112M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
112M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
112M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
245k
      ent = xmlGetPredefinedEntity(name);
7661
112M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
112M
      (ctxt->userData==ctxt)) {
7663
915k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
915k
  }
7665
112M
    }
7666
112M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
112M
    if (ent == NULL) {
7690
32.2M
  if ((ctxt->standalone == 1) ||
7691
32.2M
      ((ctxt->hasExternalSubset == 0) &&
7692
32.2M
       (ctxt->hasPErefs == 0))) {
7693
16.3M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
16.3M
         "Entity '%s' not defined\n", name);
7695
16.3M
  } else {
7696
15.9M
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
15.9M
         "Entity '%s' not defined\n", name);
7698
15.9M
      if ((ctxt->inSubset == 0) &&
7699
15.9M
    (ctxt->sax != NULL) &&
7700
15.9M
    (ctxt->sax->reference != NULL)) {
7701
7.23M
    ctxt->sax->reference(ctxt->userData, name);
7702
7.23M
      }
7703
15.9M
  }
7704
32.2M
  ctxt->valid = 0;
7705
32.2M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
80.6M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
653
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
653
     "Entity reference to unparsed entity %s\n", name);
7715
653
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
80.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
80.6M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
25.0k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
25.0k
       "Attribute references external entity '%s'\n", name);
7726
25.0k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
80.5M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
80.5M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
46.6M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
209k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
3.49k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
209k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
209k
        }
7740
46.6M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
417k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
417k
                    "'<' in entity '%s' is not allowed in attributes "
7743
417k
                    "values\n", name);
7744
46.6M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
33.9M
    else {
7750
33.9M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
33.9M
      default:
7758
33.9M
      break;
7759
33.9M
  }
7760
33.9M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
112M
    return(ent);
7769
112M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
864M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
864M
    xmlChar *name;
7805
864M
    const xmlChar *ptr;
7806
864M
    xmlChar cur;
7807
864M
    xmlEntityPtr ent = NULL;
7808
7809
864M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
864M
    ptr = *str;
7812
864M
    cur = *ptr;
7813
864M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
864M
    ptr++;
7817
864M
    name = xmlParseStringName(ctxt, &ptr);
7818
864M
    if (name == NULL) {
7819
133
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
133
           "xmlParseStringEntityRef: no name\n");
7821
133
  *str = ptr;
7822
133
  return(NULL);
7823
133
    }
7824
864M
    if (*ptr != ';') {
7825
12.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
12.4k
        xmlFree(name);
7827
12.4k
  *str = ptr;
7828
12.4k
  return(NULL);
7829
12.4k
    }
7830
864M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
864M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
851M
        ent = xmlGetPredefinedEntity(name);
7838
851M
        if (ent != NULL) {
7839
1.43M
            xmlFree(name);
7840
1.43M
            *str = ptr;
7841
1.43M
            return(ent);
7842
1.43M
        }
7843
851M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
863M
    if (ctxt->sax != NULL) {
7850
863M
  if (ctxt->sax->getEntity != NULL)
7851
863M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
863M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
640k
      ent = xmlGetPredefinedEntity(name);
7854
863M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
88.1M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
88.1M
  }
7857
863M
    }
7858
863M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
863M
    if (ent == NULL) {
7885
88.1M
  if ((ctxt->standalone == 1) ||
7886
88.1M
      ((ctxt->hasExternalSubset == 0) &&
7887
88.1M
       (ctxt->hasPErefs == 0))) {
7888
88.0M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
88.0M
         "Entity '%s' not defined\n", name);
7890
88.0M
  } else {
7891
27.7k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
27.7k
        "Entity '%s' not defined\n",
7893
27.7k
        name);
7894
27.7k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
88.1M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
775M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
40
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
40
     "Entity reference to unparsed entity %s\n", name);
7906
40
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
775M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
775M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
18
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
18
   "Attribute references external entity '%s'\n", name);
7917
18
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
775M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
775M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
775M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
188k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
3.45k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
188k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
188k
        }
7931
775M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
2.23M
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
2.23M
                    "'<' in entity '%s' is not allowed in attributes "
7934
2.23M
                    "values\n", name);
7935
775M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
414k
    else {
7941
414k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
414k
      default:
7949
414k
      break;
7950
414k
  }
7951
414k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
863M
    xmlFree(name);
7961
863M
    *str = ptr;
7962
863M
    return(ent);
7963
863M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
990M
{
8000
990M
    const xmlChar *name;
8001
990M
    xmlEntityPtr entity = NULL;
8002
990M
    xmlParserInputPtr input;
8003
8004
990M
    if (RAW != '%')
8005
0
        return;
8006
990M
    NEXT;
8007
990M
    name = xmlParseName(ctxt);
8008
990M
    if (name == NULL) {
8009
517k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
517k
  return;
8011
517k
    }
8012
989M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
989M
    if (RAW != ';') {
8016
4.64M
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
4.64M
        return;
8018
4.64M
    }
8019
8020
985M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
985M
    if ((ctxt->sax != NULL) &&
8026
985M
  (ctxt->sax->getParameterEntity != NULL))
8027
985M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
985M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
985M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
108M
  if ((ctxt->standalone == 1) ||
8040
108M
      ((ctxt->hasExternalSubset == 0) &&
8041
108M
       (ctxt->hasPErefs == 0))) {
8042
3.44k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
3.44k
            "PEReference: %%%s; not found\n",
8044
3.44k
            name);
8045
108M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
108M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
464k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
464k
                                 "PEReference: %%%s; not found\n",
8056
464k
                                 name, NULL);
8057
464k
            } else
8058
108M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
108M
                              "PEReference: %%%s; not found\n",
8060
108M
                              name, NULL);
8061
108M
            ctxt->valid = 0;
8062
108M
  }
8063
876M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
876M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
876M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
876M
  } else {
8073
876M
            xmlChar start[4];
8074
876M
            xmlCharEncoding enc;
8075
876M
            unsigned long parentConsumed;
8076
876M
            xmlEntityPtr oldEnt;
8077
8078
876M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
876M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
876M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
876M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
876M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
876M
    (ctxt->replaceEntities == 0) &&
8084
876M
    (ctxt->validate == 0))
8085
0
    return;
8086
8087
876M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
534
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
534
                xmlHaltParser(ctxt);
8090
534
                return;
8091
534
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
876M
            parentConsumed = ctxt->input->parentConsumed;
8095
876M
            oldEnt = ctxt->input->entity;
8096
876M
            if ((oldEnt == NULL) ||
8097
876M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
863M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
16.1M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
16.1M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
16.1M
                                     ctxt->input->cur - ctxt->input->base);
8102
16.1M
            }
8103
8104
876M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
876M
      if (xmlPushInput(ctxt, input) < 0) {
8106
3.39k
                xmlFreeInputStream(input);
8107
3.39k
    return;
8108
3.39k
            }
8109
8110
876M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
876M
            input->parentConsumed = parentConsumed;
8113
8114
876M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
26.8k
                GROW
8125
26.8k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
26.8k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
26.8k
                    start[0] = RAW;
8129
26.8k
                    start[1] = NXT(1);
8130
26.8k
                    start[2] = NXT(2);
8131
26.8k
                    start[3] = NXT(3);
8132
26.8k
                    enc = xmlDetectCharEncoding(start, 4);
8133
26.8k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
0
                        xmlSwitchEncoding(ctxt, enc);
8135
0
                    }
8136
26.8k
                }
8137
8138
26.8k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
26.8k
                    (IS_BLANK_CH(NXT(5)))) {
8140
0
                    xmlParseTextDecl(ctxt);
8141
0
                }
8142
26.8k
            }
8143
876M
  }
8144
876M
    }
8145
985M
    ctxt->hasPErefs = 1;
8146
985M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
19.0k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
19.0k
    xmlParserInputPtr input;
8162
19.0k
    xmlBufferPtr buf;
8163
19.0k
    int l, c;
8164
19.0k
    int count = 0;
8165
8166
19.0k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
19.0k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
19.0k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
19.0k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
19.0k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
19.0k
    buf = xmlBufferCreate();
8180
19.0k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
19.0k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
19.0k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
19.0k
    if (input == NULL) {
8189
3.46k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
3.46k
              "xmlLoadEntityContent input error");
8191
3.46k
  xmlBufferFree(buf);
8192
3.46k
        return(-1);
8193
3.46k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
15.5k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
15.5k
    GROW;
8206
15.5k
    c = CUR_CHAR(l);
8207
457M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
457M
           (IS_CHAR(c))) {
8209
457M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
457M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
4.48M
      count = 0;
8212
4.48M
      GROW;
8213
4.48M
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
4.48M
  }
8218
457M
  NEXTL(l);
8219
457M
  c = CUR_CHAR(l);
8220
457M
  if (c == 0) {
8221
11.3k
      count = 0;
8222
11.3k
      GROW;
8223
11.3k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
11.3k
      c = CUR_CHAR(l);
8228
11.3k
  }
8229
457M
    }
8230
8231
15.5k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
9.97k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
9.97k
        xmlPopInput(ctxt);
8234
9.97k
    } else if (!IS_CHAR(c)) {
8235
5.57k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
5.57k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
5.57k
                    c);
8238
5.57k
  xmlBufferFree(buf);
8239
5.57k
  return(-1);
8240
5.57k
    }
8241
9.97k
    entity->content = buf->content;
8242
9.97k
    entity->length = buf->use;
8243
9.97k
    buf->content = NULL;
8244
9.97k
    xmlBufferFree(buf);
8245
8246
9.97k
    return(0);
8247
15.5k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
7.01M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
7.01M
    const xmlChar *ptr;
8283
7.01M
    xmlChar cur;
8284
7.01M
    xmlChar *name;
8285
7.01M
    xmlEntityPtr entity = NULL;
8286
8287
7.01M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
7.01M
    ptr = *str;
8289
7.01M
    cur = *ptr;
8290
7.01M
    if (cur != '%')
8291
0
        return(NULL);
8292
7.01M
    ptr++;
8293
7.01M
    name = xmlParseStringName(ctxt, &ptr);
8294
7.01M
    if (name == NULL) {
8295
39.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
39.7k
           "xmlParseStringPEReference: no name\n");
8297
39.7k
  *str = ptr;
8298
39.7k
  return(NULL);
8299
39.7k
    }
8300
6.97M
    cur = *ptr;
8301
6.97M
    if (cur != ';') {
8302
3.20k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
3.20k
  xmlFree(name);
8304
3.20k
  *str = ptr;
8305
3.20k
  return(NULL);
8306
3.20k
    }
8307
6.97M
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
6.97M
    if ((ctxt->sax != NULL) &&
8313
6.97M
  (ctxt->sax->getParameterEntity != NULL))
8314
6.97M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
6.97M
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
6.97M
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
242k
  if ((ctxt->standalone == 1) ||
8330
242k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
242k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
242k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
242k
        "PEReference: %%%s; not found\n",
8343
242k
        name, NULL);
8344
242k
      ctxt->valid = 0;
8345
242k
  }
8346
6.73M
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
6.73M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
6.73M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
6.73M
    }
8357
6.97M
    ctxt->hasPErefs = 1;
8358
6.97M
    xmlFree(name);
8359
6.97M
    *str = ptr;
8360
6.97M
    return(entity);
8361
6.97M
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
1.16M
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
1.16M
    const xmlChar *name = NULL;
8382
1.16M
    xmlChar *ExternalID = NULL;
8383
1.16M
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
1.16M
    SKIP(9);
8389
8390
1.16M
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
1.16M
    name = xmlParseName(ctxt);
8396
1.16M
    if (name == NULL) {
8397
1.69k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
1.69k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
1.69k
    }
8400
1.16M
    ctxt->intSubName = name;
8401
8402
1.16M
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
1.16M
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
1.16M
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
522k
        ctxt->hasExternalSubset = 1;
8411
522k
    }
8412
1.16M
    ctxt->extSubURI = URI;
8413
1.16M
    ctxt->extSubSystem = ExternalID;
8414
8415
1.16M
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
1.16M
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
1.16M
  (!ctxt->disableSAX))
8422
1.10M
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
1.16M
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
1.16M
    if (RAW == '[')
8431
835k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
332k
    if (RAW != '>') {
8437
63.8k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
63.8k
    }
8439
332k
    NEXT;
8440
332k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
834k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
834k
    if (RAW == '[') {
8457
834k
        int baseInputNr = ctxt->inputNr;
8458
834k
        ctxt->instate = XML_PARSER_DTD;
8459
834k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
834k
  SKIP_BLANKS;
8466
875M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
875M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
874M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
874M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
874M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
874M
          xmlParseMarkupDecl(ctxt);
8478
874M
            } else if (RAW == '%') {
8479
49.5k
          xmlParsePEReference(ctxt);
8480
262k
            } else {
8481
262k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
262k
                        "xmlParseInternalSubset: error detected in"
8483
262k
                        " Markup declaration\n");
8484
262k
                xmlHaltParser(ctxt);
8485
262k
                return;
8486
262k
            }
8487
874M
      SKIP_BLANKS;
8488
874M
  }
8489
572k
  if (RAW == ']') {
8490
497k
      NEXT;
8491
497k
      SKIP_BLANKS;
8492
497k
  }
8493
572k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
572k
    if (RAW != '>') {
8499
78.5k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
78.5k
  return;
8501
78.5k
    }
8502
493k
    NEXT;
8503
493k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
224M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
224M
    const xmlChar *name;
8544
224M
    xmlChar *val;
8545
8546
224M
    *value = NULL;
8547
224M
    GROW;
8548
224M
    name = xmlParseName(ctxt);
8549
224M
    if (name == NULL) {
8550
1.01M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
1.01M
                 "error parsing attribute name\n");
8552
1.01M
        return(NULL);
8553
1.01M
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
223M
    SKIP_BLANKS;
8559
223M
    if (RAW == '=') {
8560
223M
        NEXT;
8561
223M
  SKIP_BLANKS;
8562
223M
  val = xmlParseAttValue(ctxt);
8563
223M
  ctxt->instate = XML_PARSER_CONTENT;
8564
223M
    } else {
8565
354k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
354k
         "Specification mandates value for attribute %s\n", name);
8567
354k
  return(name);
8568
354k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
223M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
14.9k
  if (!xmlCheckLanguageID(val)) {
8577
797
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
797
              "Malformed value for xml:lang : %s\n",
8579
797
        val, NULL);
8580
797
  }
8581
14.9k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
223M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
229
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
229
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
150
      *(ctxt->space) = 1;
8591
79
  else {
8592
79
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
79
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
79
                                 val, NULL);
8595
79
  }
8596
229
    }
8597
8598
223M
    *value = val;
8599
223M
    return(name);
8600
223M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
134M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
134M
    const xmlChar *name;
8634
134M
    const xmlChar *attname;
8635
134M
    xmlChar *attvalue;
8636
134M
    const xmlChar **atts = ctxt->atts;
8637
134M
    int nbatts = 0;
8638
134M
    int maxatts = ctxt->maxatts;
8639
134M
    int i;
8640
8641
134M
    if (RAW != '<') return(NULL);
8642
134M
    NEXT1;
8643
8644
134M
    name = xmlParseName(ctxt);
8645
134M
    if (name == NULL) {
8646
1.43M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
1.43M
       "xmlParseStartTag: invalid element name\n");
8648
1.43M
        return(NULL);
8649
1.43M
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
133M
    SKIP_BLANKS;
8657
133M
    GROW;
8658
8659
240M
    while (((RAW != '>') &&
8660
240M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
240M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
224M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
224M
        if (attname == NULL) {
8664
1.01M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
1.01M
         "xmlParseStartTag: problem parsing attributes\n");
8666
1.01M
      break;
8667
1.01M
  }
8668
223M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
330M
      for (i = 0; i < nbatts;i += 2) {
8675
106M
          if (xmlStrEqual(atts[i], attname)) {
8676
3.93k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
3.93k
        xmlFree(attvalue);
8678
3.93k
        goto failed;
8679
3.93k
    }
8680
106M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
223M
      if (atts == NULL) {
8685
215k
          maxatts = 22; /* allow for 10 attrs by default */
8686
215k
          atts = (const xmlChar **)
8687
215k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
215k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
215k
    ctxt->atts = atts;
8695
215k
    ctxt->maxatts = maxatts;
8696
223M
      } else if (nbatts + 4 > maxatts) {
8697
704
          const xmlChar **n;
8698
8699
704
          maxatts *= 2;
8700
704
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
704
               maxatts * sizeof(const xmlChar *));
8702
704
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
704
    atts = n;
8709
704
    ctxt->atts = atts;
8710
704
    ctxt->maxatts = maxatts;
8711
704
      }
8712
223M
      atts[nbatts++] = attname;
8713
223M
      atts[nbatts++] = attvalue;
8714
223M
      atts[nbatts] = NULL;
8715
223M
      atts[nbatts + 1] = NULL;
8716
223M
  } else {
8717
377k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
377k
  }
8720
8721
223M
failed:
8722
8723
223M
  GROW
8724
223M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
116M
      break;
8726
106M
  if (SKIP_BLANKS == 0) {
8727
577k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
577k
         "attributes construct error\n");
8729
577k
  }
8730
106M
  SHRINK;
8731
106M
        GROW;
8732
106M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
133M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
133M
  (!ctxt->disableSAX)) {
8739
130M
  if (nbatts > 0)
8740
114M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
15.7M
  else
8742
15.7M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
130M
    }
8744
8745
133M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
355M
        for (i = 1;i < nbatts;i+=2)
8748
223M
      if (atts[i] != NULL)
8749
223M
         xmlFree((xmlChar *) atts[i]);
8750
132M
    }
8751
133M
    return(name);
8752
133M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
26.4M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
26.4M
    const xmlChar *name;
8772
8773
26.4M
    GROW;
8774
26.4M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
26.4M
    SKIP(2);
8780
8781
26.4M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
26.4M
    GROW;
8787
26.4M
    SKIP_BLANKS;
8788
26.4M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
101k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
101k
    } else
8791
26.3M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
26.4M
    if (name != (xmlChar*)1) {
8800
711k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
711k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
711k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
711k
                    ctxt->name, line, name);
8804
711k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
26.4M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
26.4M
  (!ctxt->disableSAX))
8811
25.7M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
26.4M
    namePop(ctxt);
8814
26.4M
    spacePop(ctxt);
8815
26.4M
    return;
8816
26.4M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
337M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
337M
    int i;
8858
8859
337M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
359M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
41.0M
        if (ctxt->nsTab[i] == prefix) {
8862
16.1M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
1.73k
          return(NULL);
8864
16.1M
      return(ctxt->nsTab[i + 1]);
8865
16.1M
  }
8866
318M
    return(NULL);
8867
334M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
840M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
840M
    const xmlChar *l, *p;
8886
8887
840M
    GROW;
8888
8889
840M
    l = xmlParseNCName(ctxt);
8890
840M
    if (l == NULL) {
8891
2.83M
        if (CUR == ':') {
8892
3.30k
      l = xmlParseName(ctxt);
8893
3.30k
      if (l != NULL) {
8894
3.30k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
3.30k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
3.30k
    *prefix = NULL;
8897
3.30k
    return(l);
8898
3.30k
      }
8899
3.30k
  }
8900
2.83M
        return(NULL);
8901
2.83M
    }
8902
837M
    if (CUR == ':') {
8903
2.85M
        NEXT;
8904
2.85M
  p = l;
8905
2.85M
  l = xmlParseNCName(ctxt);
8906
2.85M
  if (l == NULL) {
8907
30.8k
      xmlChar *tmp;
8908
8909
30.8k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
30.8k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
30.8k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
30.8k
      l = xmlParseNmtoken(ctxt);
8914
30.8k
      if (l == NULL) {
8915
12.5k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
12.5k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
18.3k
            } else {
8919
18.3k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
18.3k
    xmlFree((char *)l);
8921
18.3k
      }
8922
30.8k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
30.8k
      if (tmp != NULL) xmlFree(tmp);
8924
30.8k
      *prefix = NULL;
8925
30.8k
      return(p);
8926
30.8k
  }
8927
2.82M
  if (CUR == ':') {
8928
11.9k
      xmlChar *tmp;
8929
8930
11.9k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
11.9k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
11.9k
      NEXT;
8933
11.9k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
11.9k
      if (tmp != NULL) {
8935
10.1k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
10.1k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
10.1k
    if (tmp != NULL) xmlFree(tmp);
8938
10.1k
    *prefix = p;
8939
10.1k
    return(l);
8940
10.1k
      }
8941
1.80k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
1.80k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
1.80k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
1.80k
      if (tmp != NULL) xmlFree(tmp);
8946
1.80k
      *prefix = p;
8947
1.80k
      return(l);
8948
1.80k
  }
8949
2.81M
  *prefix = p;
8950
2.81M
    } else
8951
834M
        *prefix = NULL;
8952
837M
    return(l);
8953
837M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
921k
                        xmlChar const *prefix) {
8971
921k
    const xmlChar *cmp;
8972
921k
    const xmlChar *in;
8973
921k
    const xmlChar *ret;
8974
921k
    const xmlChar *prefix2;
8975
8976
921k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
921k
    GROW;
8979
921k
    in = ctxt->input->cur;
8980
8981
921k
    cmp = prefix;
8982
2.98M
    while (*in != 0 && *in == *cmp) {
8983
2.06M
  ++in;
8984
2.06M
  ++cmp;
8985
2.06M
    }
8986
921k
    if ((*cmp == 0) && (*in == ':')) {
8987
825k
        in++;
8988
825k
  cmp = name;
8989
6.61M
  while (*in != 0 && *in == *cmp) {
8990
5.79M
      ++in;
8991
5.79M
      ++cmp;
8992
5.79M
  }
8993
825k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
770k
            ctxt->input->col += in - ctxt->input->cur;
8996
770k
      ctxt->input->cur = in;
8997
770k
      return((const xmlChar*) 1);
8998
770k
  }
8999
825k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
151k
    ret = xmlParseQName (ctxt, &prefix2);
9004
151k
    if ((ret == name) && (prefix == prefix2))
9005
1.41k
  return((const xmlChar*) 1);
9006
149k
    return ret;
9007
151k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
25.2k
    const xmlChar *oldbase = ctxt->input->base;\
9045
25.2k
    GROW;\
9046
25.2k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
25.2k
        return(NULL);\
9048
25.2k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
25.2k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
731M
{
9059
731M
    xmlChar limit = 0;
9060
731M
    const xmlChar *in = NULL, *start, *end, *last;
9061
731M
    xmlChar *ret = NULL;
9062
731M
    int line, col;
9063
731M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
170M
                    XML_MAX_HUGE_LENGTH :
9065
731M
                    XML_MAX_TEXT_LENGTH;
9066
9067
731M
    GROW;
9068
731M
    in = (xmlChar *) CUR_PTR;
9069
731M
    line = ctxt->input->line;
9070
731M
    col = ctxt->input->col;
9071
731M
    if (*in != '"' && *in != '\'') {
9072
76.9k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
76.9k
        return (NULL);
9074
76.9k
    }
9075
731M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
731M
    limit = *in++;
9083
731M
    col++;
9084
731M
    end = ctxt->input->end;
9085
731M
    start = in;
9086
731M
    if (in >= end) {
9087
2.04k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
2.04k
    }
9089
731M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
26.0M
  while ((in < end) && (*in != limit) &&
9094
26.0M
         ((*in == 0x20) || (*in == 0x9) ||
9095
26.0M
          (*in == 0xA) || (*in == 0xD))) {
9096
152k
      if (*in == 0xA) {
9097
72.7k
          line++; col = 1;
9098
80.2k
      } else {
9099
80.2k
          col++;
9100
80.2k
      }
9101
152k
      in++;
9102
152k
      start = in;
9103
152k
      if (in >= end) {
9104
187
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
187
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
187
      }
9111
152k
  }
9112
279M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
279M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
253M
      col++;
9115
253M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
253M
      if (in >= end) {
9117
1.17k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
1.17k
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
1.17k
      }
9124
253M
  }
9125
25.8M
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
25.8M
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
25.9M
  while ((in < end) && (*in != limit) &&
9131
25.9M
         ((*in == 0x20) || (*in == 0x9) ||
9132
190k
          (*in == 0xA) || (*in == 0xD))) {
9133
89.5k
      if (*in == 0xA) {
9134
33.1k
          line++, col = 1;
9135
56.3k
      } else {
9136
56.3k
          col++;
9137
56.3k
      }
9138
89.5k
      in++;
9139
89.5k
      if (in >= end) {
9140
835
    const xmlChar *oldbase = ctxt->input->base;
9141
835
    GROW;
9142
835
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
835
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
835
    end = ctxt->input->end;
9151
835
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
835
      }
9157
89.5k
  }
9158
25.8M
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
25.8M
  if (*in != limit) goto need_complex;
9164
706M
    } else {
9165
6.00G
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
6.00G
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
5.29G
      in++;
9168
5.29G
      col++;
9169
5.29G
      if (in >= end) {
9170
21.8k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
21.8k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
21.8k
      }
9177
5.29G
  }
9178
706M
  last = in;
9179
706M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
706M
  if (*in != limit) goto need_complex;
9185
706M
    }
9186
728M
    in++;
9187
728M
    col++;
9188
728M
    if (len != NULL) {
9189
501M
        if (alloc) *alloc = 0;
9190
501M
        *len = last - start;
9191
501M
        ret = (xmlChar *) start;
9192
501M
    } else {
9193
227M
        if (alloc) *alloc = 1;
9194
227M
        ret = xmlStrndup(start, last - start);
9195
227M
    }
9196
728M
    CUR_PTR = in;
9197
728M
    ctxt->input->line = line;
9198
728M
    ctxt->input->col = col;
9199
728M
    return ret;
9200
3.32M
need_complex:
9201
3.32M
    if (alloc) *alloc = 1;
9202
3.32M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
731M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
504M
{
9226
504M
    const xmlChar *name;
9227
504M
    xmlChar *val, *internal_val = NULL;
9228
504M
    int normalize = 0;
9229
9230
504M
    *value = NULL;
9231
504M
    GROW;
9232
504M
    name = xmlParseQName(ctxt, prefix);
9233
504M
    if (name == NULL) {
9234
322k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
322k
                       "error parsing attribute name\n");
9236
322k
        return (NULL);
9237
322k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
503M
    if (ctxt->attsSpecial != NULL) {
9243
78.1M
        int type;
9244
9245
78.1M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
78.1M
                                                 pref, elem, *prefix, name);
9247
78.1M
        if (type != 0)
9248
25.8M
            normalize = 1;
9249
78.1M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
503M
    SKIP_BLANKS;
9255
503M
    if (RAW == '=') {
9256
503M
        NEXT;
9257
503M
        SKIP_BLANKS;
9258
503M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
503M
        if (val == NULL)
9260
24.7k
            return (NULL);
9261
503M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
25.8M
      if (*alloc) {
9269
102k
          const xmlChar *val2;
9270
9271
102k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
102k
    if ((val2 != NULL) && (val2 != val)) {
9273
15.2k
        xmlFree(val);
9274
15.2k
        val = (xmlChar *) val2;
9275
15.2k
    }
9276
102k
      }
9277
25.8M
  }
9278
503M
        ctxt->instate = XML_PARSER_CONTENT;
9279
503M
    } else {
9280
109k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
109k
                          "Specification mandates value for attribute %s\n",
9282
109k
                          name);
9283
109k
        return (name);
9284
109k
    }
9285
9286
503M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
138k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
7.06k
            internal_val = xmlStrndup(val, *len);
9294
7.06k
            if (!xmlCheckLanguageID(internal_val)) {
9295
733
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
733
                              "Malformed value for xml:lang : %s\n",
9297
733
                              internal_val, NULL);
9298
733
            }
9299
7.06k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
138k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
621
            internal_val = xmlStrndup(val, *len);
9306
621
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
621
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
509
                *(ctxt->space) = 1;
9310
112
            else {
9311
112
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
112
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
112
                              internal_val, NULL);
9314
112
            }
9315
621
        }
9316
138k
        if (internal_val) {
9317
7.68k
            xmlFree(internal_val);
9318
7.68k
        }
9319
138k
    }
9320
9321
503M
    *value = val;
9322
503M
    return (name);
9323
503M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
336M
                  const xmlChar **URI, int *tlen) {
9356
336M
    const xmlChar *localname;
9357
336M
    const xmlChar *prefix;
9358
336M
    const xmlChar *attname;
9359
336M
    const xmlChar *aprefix;
9360
336M
    const xmlChar *nsname;
9361
336M
    xmlChar *attvalue;
9362
336M
    const xmlChar **atts = ctxt->atts;
9363
336M
    int maxatts = ctxt->maxatts;
9364
336M
    int nratts, nbatts, nbdef, inputid;
9365
336M
    int i, j, nbNs, attval;
9366
336M
    unsigned long cur;
9367
336M
    int nsNr = ctxt->nsNr;
9368
9369
336M
    if (RAW != '<') return(NULL);
9370
336M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
336M
    SHRINK;
9380
336M
    cur = ctxt->input->cur - ctxt->input->base;
9381
336M
    inputid = ctxt->input->id;
9382
336M
    nbatts = 0;
9383
336M
    nratts = 0;
9384
336M
    nbdef = 0;
9385
336M
    nbNs = 0;
9386
336M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
336M
    ctxt->nsNr = nsNr;
9389
9390
336M
    localname = xmlParseQName(ctxt, &prefix);
9391
336M
    if (localname == NULL) {
9392
2.50M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
2.50M
           "StartTag: invalid element name\n");
9394
2.50M
        return(NULL);
9395
2.50M
    }
9396
333M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
333M
    SKIP_BLANKS;
9404
333M
    GROW;
9405
9406
565M
    while (((RAW != '>') &&
9407
565M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
565M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
504M
  int len = -1, alloc = 0;
9410
9411
504M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
504M
                               &aprefix, &attvalue, &len, &alloc);
9413
504M
        if (attname == NULL) {
9414
347k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
347k
           "xmlParseStartTag: problem parsing attributes\n");
9416
347k
      break;
9417
347k
  }
9418
503M
        if (attvalue == NULL)
9419
109k
            goto next_attr;
9420
503M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
503M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
143k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
143k
            xmlURIPtr uri;
9425
9426
143k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
143k
            if (*URL != 0) {
9434
142k
                uri = xmlParseURI((const char *) URL);
9435
142k
                if (uri == NULL) {
9436
31.5k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
31.5k
                             "xmlns: '%s' is not a valid URI\n",
9438
31.5k
                                       URL, NULL, NULL);
9439
110k
                } else {
9440
110k
                    if (uri->scheme == NULL) {
9441
5.38k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
5.38k
                                  "xmlns: URI %s is not absolute\n",
9443
5.38k
                                  URL, NULL, NULL);
9444
5.38k
                    }
9445
110k
                    xmlFreeURI(uri);
9446
110k
                }
9447
142k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
142k
                if ((len == 29) &&
9456
142k
                    (xmlStrEqual(URL,
9457
3.14k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
142k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
182k
            for (j = 1;j <= nbNs;j++)
9468
40.1k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
1.06k
                    break;
9470
143k
            if (j <= nbNs)
9471
1.06k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
142k
            else
9473
142k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
503M
        } else if (aprefix == ctxt->str_xmlns) {
9476
298k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
298k
            xmlURIPtr uri;
9478
9479
298k
            if (attname == ctxt->str_xml) {
9480
162
                if (URL != ctxt->str_xml_ns) {
9481
162
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
162
                             "xml namespace prefix mapped to wrong URI\n",
9483
162
                             NULL, NULL, NULL);
9484
162
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
162
                goto next_attr;
9489
162
            }
9490
298k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
298k
            if (attname == ctxt->str_xmlns) {
9499
3
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
3
                         "redefinition of the xmlns prefix is forbidden\n",
9501
3
                         NULL, NULL, NULL);
9502
3
                goto next_attr;
9503
3
            }
9504
298k
            if ((len == 29) &&
9505
298k
                (xmlStrEqual(URL,
9506
5.03k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
298k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
565
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
565
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
565
                              attname, NULL, NULL);
9516
565
                goto next_attr;
9517
297k
            } else {
9518
297k
                uri = xmlParseURI((const char *) URL);
9519
297k
                if (uri == NULL) {
9520
45.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
45.1k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
45.1k
                                       attname, URL, NULL);
9523
252k
                } else {
9524
252k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
1.41k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
1.41k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
1.41k
                                  attname, URL, NULL);
9528
1.41k
                    }
9529
252k
                    xmlFreeURI(uri);
9530
252k
                }
9531
297k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
469k
            for (j = 1;j <= nbNs;j++)
9537
174k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
2.12k
                    break;
9539
297k
            if (j <= nbNs)
9540
2.12k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
295k
            else
9542
295k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
503M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
503M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
472k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
472k
                maxatts = ctxt->maxatts;
9553
472k
                atts = ctxt->atts;
9554
472k
            }
9555
503M
            ctxt->attallocs[nratts++] = alloc;
9556
503M
            atts[nbatts++] = attname;
9557
503M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
503M
            if (alloc)
9565
2.26M
                atts[nbatts++] = NULL;
9566
501M
            else
9567
501M
                atts[nbatts++] = ctxt->input->base;
9568
503M
            atts[nbatts++] = attvalue;
9569
503M
            attvalue += len;
9570
503M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
503M
            if (alloc != 0) attval = 1;
9575
503M
            attvalue = NULL; /* moved into atts */
9576
503M
        }
9577
9578
503M
next_attr:
9579
503M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
83.9k
            xmlFree(attvalue);
9581
83.9k
            attvalue = NULL;
9582
83.9k
        }
9583
9584
503M
  GROW
9585
503M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
503M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
271M
      break;
9589
232M
  if (SKIP_BLANKS == 0) {
9590
400k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
400k
         "attributes construct error\n");
9592
400k
      break;
9593
400k
  }
9594
231M
        GROW;
9595
231M
    }
9596
9597
333M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
836M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
503M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
501M
            const xmlChar *old = atts[i+2];
9612
501M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
501M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
501M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
501M
        }
9616
503M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
333M
    if (ctxt->attsDefault != NULL) {
9622
74.7M
        xmlDefAttrsPtr defaults;
9623
9624
74.7M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
74.7M
  if (defaults != NULL) {
9626
13.9M
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
9.80M
          attname = defaults->values[5 * i];
9628
9.80M
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
9.80M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
2.99k
        for (j = 1;j <= nbNs;j++)
9638
1.90k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
988
          break;
9640
2.07k
              if (j <= nbNs) continue;
9641
9642
1.08k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
1.08k
        if (nsname != defaults->values[5 * i + 2]) {
9644
924
      if (nsPush(ctxt, NULL,
9645
924
                 defaults->values[5 * i + 2]) > 0)
9646
924
          nbNs++;
9647
924
        }
9648
9.80M
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
15.3k
        for (j = 1;j <= nbNs;j++)
9653
8.24k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
7.70k
          break;
9655
14.8k
              if (j <= nbNs) continue;
9656
9657
7.12k
        nsname = xmlGetNamespace(ctxt, attname);
9658
7.12k
        if (nsname != defaults->values[5 * i + 2]) {
9659
5.70k
      if (nsPush(ctxt, attname,
9660
5.70k
                 defaults->values[5 * i + 2]) > 0)
9661
5.70k
          nbNs++;
9662
5.70k
        }
9663
9.78M
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
27.5M
        for (j = 0;j < nbatts;j+=5) {
9668
17.9M
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
223k
          break;
9670
17.9M
        }
9671
9.78M
        if (j < nbatts) continue;
9672
9673
9.56M
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
4.41k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
4.41k
      maxatts = ctxt->maxatts;
9679
4.41k
      atts = ctxt->atts;
9680
4.41k
        }
9681
9.56M
        atts[nbatts++] = attname;
9682
9.56M
        atts[nbatts++] = aprefix;
9683
9.56M
        if (aprefix == NULL)
9684
8.22M
      atts[nbatts++] = NULL;
9685
1.33M
        else
9686
1.33M
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
9.56M
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
9.56M
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
9.56M
        if ((ctxt->standalone == 1) &&
9690
9.56M
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
9.56M
        nbdef++;
9696
9.56M
    }
9697
9.80M
      }
9698
4.18M
  }
9699
74.7M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
846M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
512M
  if (atts[i + 1] != NULL) {
9709
2.20M
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
2.20M
      if (nsname == NULL) {
9711
231k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
231k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
231k
        atts[i + 1], atts[i], localname);
9714
231k
      }
9715
2.20M
      atts[i + 2] = nsname;
9716
2.20M
  } else
9717
510M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
762M
        for (j = 0; j < i;j += 5) {
9725
250M
      if (atts[i] == atts[j]) {
9726
70.6k
          if (atts[i+1] == atts[j+1]) {
9727
9.59k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
9.59k
        break;
9729
9.59k
    }
9730
61.0k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
147
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
147
           "Namespaced Attribute %s in '%s' redefined\n",
9733
147
           atts[i], nsname, NULL);
9734
147
        break;
9735
147
    }
9736
61.0k
      }
9737
250M
  }
9738
512M
    }
9739
9740
333M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
333M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
408k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
408k
           "Namespace prefix %s on %s is not defined\n",
9744
408k
     prefix, localname, NULL);
9745
408k
    }
9746
333M
    *pref = prefix;
9747
333M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
333M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
333M
  (!ctxt->disableSAX)) {
9754
250M
  if (nbNs > 0)
9755
210k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
210k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
210k
        nbatts / 5, nbdef, atts);
9758
250M
  else
9759
250M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
250M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
250M
    }
9762
9763
333M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
333M
    if (attval != 0) {
9768
5.12M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
2.90M
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
2.26M
          xmlFree((xmlChar *) atts[i]);
9771
2.21M
    }
9772
9773
333M
    return(localname);
9774
333M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
103M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
103M
    const xmlChar *name;
9794
9795
103M
    GROW;
9796
103M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
103M
    SKIP(2);
9801
9802
103M
    if (tag->prefix == NULL)
9803
102M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
921k
    else
9805
921k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
103M
    GROW;
9811
103M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
103M
    SKIP_BLANKS;
9814
103M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
150k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
150k
    } else
9817
103M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
103M
    if (name != (xmlChar*)1) {
9826
850k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
850k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
850k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
850k
                    ctxt->name, tag->line, name);
9830
850k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
103M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
103M
  (!ctxt->disableSAX))
9837
72.6M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
72.6M
                                tag->URI);
9839
9840
103M
    spacePop(ctxt);
9841
103M
    if (tag->nsNr != 0)
9842
88.2k
  nsPop(ctxt, tag->nsNr);
9843
103M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
482k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
482k
    xmlChar *buf = NULL;
9864
482k
    int len = 0;
9865
482k
    int size = XML_PARSER_BUFFER_SIZE;
9866
482k
    int r, rl;
9867
482k
    int s, sl;
9868
482k
    int cur, l;
9869
482k
    int count = 0;
9870
482k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
48.9k
                    XML_MAX_HUGE_LENGTH :
9872
482k
                    XML_MAX_TEXT_LENGTH;
9873
9874
482k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
482k
    SKIP(3);
9877
9878
482k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
482k
    SKIP(6);
9881
9882
482k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
482k
    r = CUR_CHAR(rl);
9884
482k
    if (!IS_CHAR(r)) {
9885
225
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
225
        goto out;
9887
225
    }
9888
481k
    NEXTL(rl);
9889
481k
    s = CUR_CHAR(sl);
9890
481k
    if (!IS_CHAR(s)) {
9891
128
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
128
        goto out;
9893
128
    }
9894
481k
    NEXTL(sl);
9895
481k
    cur = CUR_CHAR(l);
9896
481k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
481k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
999M
    while (IS_CHAR(cur) &&
9902
999M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
998M
  if (len + 5 >= size) {
9904
588k
      xmlChar *tmp;
9905
9906
588k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
588k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
588k
      buf = tmp;
9912
588k
      size *= 2;
9913
588k
  }
9914
998M
  COPY_BUF(rl,buf,len,r);
9915
998M
  r = s;
9916
998M
  rl = sl;
9917
998M
  s = cur;
9918
998M
  sl = l;
9919
998M
  count++;
9920
998M
  if (count > 50) {
9921
19.3M
      SHRINK;
9922
19.3M
      GROW;
9923
19.3M
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
19.3M
      count = 0;
9927
19.3M
  }
9928
998M
  NEXTL(l);
9929
998M
  cur = CUR_CHAR(l);
9930
998M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
998M
    }
9936
481k
    buf[len] = 0;
9937
481k
    if (cur != '>') {
9938
16.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
16.6k
                       "CData section not finished\n%.50s\n", buf);
9940
16.6k
        goto out;
9941
16.6k
    }
9942
465k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
465k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
258k
  if (ctxt->sax->cdataBlock != NULL)
9949
190k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
67.8k
  else if (ctxt->sax->characters != NULL)
9951
67.8k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
258k
    }
9953
9954
482k
out:
9955
482k
    if (ctxt->instate != XML_PARSER_EOF)
9956
482k
        ctxt->instate = XML_PARSER_CONTENT;
9957
482k
    xmlFree(buf);
9958
482k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
1.17M
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
1.17M
    int nameNr = ctxt->nameNr;
9971
9972
1.17M
    GROW;
9973
767M
    while ((RAW != 0) &&
9974
767M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
766M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
766M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
87.8k
      xmlParsePI(ctxt);
9982
87.8k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
766M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
482k
      xmlParseCDSect(ctxt);
9990
482k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
766M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
766M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
2.95M
      xmlParseComment(ctxt);
9998
2.95M
      ctxt->instate = XML_PARSER_CONTENT;
9999
2.95M
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
763M
  else if (*cur == '<') {
10005
331M
            if (NXT(1) == '/') {
10006
74.8M
                if (ctxt->nameNr <= nameNr)
10007
190k
                    break;
10008
74.7M
          xmlParseElementEnd(ctxt);
10009
256M
            } else {
10010
256M
          xmlParseElementStart(ctxt);
10011
256M
            }
10012
331M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
432M
  else if (*cur == '&') {
10020
33.0M
      xmlParseReference(ctxt);
10021
33.0M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
399M
  else {
10027
399M
      xmlParseCharData(ctxt, 0);
10028
399M
  }
10029
10030
766M
  GROW;
10031
766M
  SHRINK;
10032
766M
    }
10033
1.17M
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
866k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
866k
    int nameNr = ctxt->nameNr;
10047
10048
866k
    xmlParseContentInternal(ctxt);
10049
10050
866k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
6.20k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
6.20k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
6.20k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
6.20k
                "Premature end of data in tag %s line %d\n",
10055
6.20k
    name, line, NULL);
10056
6.20k
    }
10057
866k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
408k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
408k
    if (xmlParseElementStart(ctxt) != 0)
10078
98.7k
        return;
10079
10080
310k
    xmlParseContentInternal(ctxt);
10081
310k
    if (ctxt->instate == XML_PARSER_EOF)
10082
1.21k
  return;
10083
10084
308k
    if (CUR == 0) {
10085
122k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
122k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
122k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
122k
                "Premature end of data in tag %s line %d\n",
10089
122k
    name, line, NULL);
10090
122k
        return;
10091
122k
    }
10092
10093
186k
    xmlParseElementEnd(ctxt);
10094
186k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
256M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
256M
    const xmlChar *name;
10108
256M
    const xmlChar *prefix = NULL;
10109
256M
    const xmlChar *URI = NULL;
10110
256M
    xmlParserNodeInfo node_info;
10111
256M
    int line, tlen = 0;
10112
256M
    xmlNodePtr ret;
10113
256M
    int nsNr = ctxt->nsNr;
10114
10115
256M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
256M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
48
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
48
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
48
        xmlParserMaxDepth);
10120
48
  xmlHaltParser(ctxt);
10121
48
  return(-1);
10122
48
    }
10123
10124
    /* Capture start position */
10125
256M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
256M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
256M
    else if (*ctxt->space == -2)
10134
19.1M
  spacePush(ctxt, -1);
10135
237M
    else
10136
237M
  spacePush(ctxt, *ctxt->space);
10137
10138
256M
    line = ctxt->input->line;
10139
256M
#ifdef LIBXML_SAX1_ENABLED
10140
256M
    if (ctxt->sax2)
10141
182M
#endif /* LIBXML_SAX1_ENABLED */
10142
182M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
74.3M
#ifdef LIBXML_SAX1_ENABLED
10144
74.3M
    else
10145
74.3M
  name = xmlParseStartTag(ctxt);
10146
256M
#endif /* LIBXML_SAX1_ENABLED */
10147
256M
    if (ctxt->instate == XML_PARSER_EOF)
10148
3.07k
  return(-1);
10149
256M
    if (name == NULL) {
10150
3.87M
  spacePop(ctxt);
10151
3.87M
        return(-1);
10152
3.87M
    }
10153
252M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
252M
    ret = ctxt->node;
10155
10156
252M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
252M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
252M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
252M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
252M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
176M
        SKIP(2);
10172
176M
  if (ctxt->sax2) {
10173
120M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
120M
    (!ctxt->disableSAX))
10175
68.9M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
120M
#ifdef LIBXML_SAX1_ENABLED
10177
120M
  } else {
10178
55.5M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
55.5M
    (!ctxt->disableSAX))
10180
53.3M
    ctxt->sax->endElement(ctxt->userData, name);
10181
55.5M
#endif /* LIBXML_SAX1_ENABLED */
10182
55.5M
  }
10183
176M
  namePop(ctxt);
10184
176M
  spacePop(ctxt);
10185
176M
  if (nsNr != ctxt->nsNr)
10186
9.03k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
176M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
176M
  return(1);
10195
176M
    }
10196
76.7M
    if (RAW == '>') {
10197
75.6M
        NEXT1;
10198
75.6M
    } else {
10199
1.11M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
1.11M
         "Couldn't find end of Start Tag %s line %d\n",
10201
1.11M
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
1.11M
  nodePop(ctxt);
10207
1.11M
  namePop(ctxt);
10208
1.11M
  spacePop(ctxt);
10209
1.11M
  if (nsNr != ctxt->nsNr)
10210
28.6k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
1.11M
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
1.11M
  return(-1);
10223
1.11M
    }
10224
10225
75.6M
    return(0);
10226
76.7M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
74.8M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
74.8M
    xmlParserNodeInfo node_info;
10237
74.8M
    xmlNodePtr ret = ctxt->node;
10238
10239
74.8M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
74.8M
    if (ctxt->sax2) {
10249
58.4M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
58.4M
  namePop(ctxt);
10251
58.4M
    }
10252
16.4M
#ifdef LIBXML_SAX1_ENABLED
10253
16.4M
    else
10254
16.4M
  xmlParseEndTag1(ctxt, 0);
10255
74.8M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
74.8M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
74.8M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
1.07M
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
1.07M
    xmlChar *buf = NULL;
10286
1.07M
    int len = 0;
10287
1.07M
    int size = 10;
10288
1.07M
    xmlChar cur;
10289
10290
1.07M
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
1.07M
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
1.07M
    cur = CUR;
10296
1.07M
    if (!((cur >= '0') && (cur <= '9'))) {
10297
4.09k
  xmlFree(buf);
10298
4.09k
  return(NULL);
10299
4.09k
    }
10300
1.07M
    buf[len++] = cur;
10301
1.07M
    NEXT;
10302
1.07M
    cur=CUR;
10303
1.07M
    if (cur != '.') {
10304
8.81k
  xmlFree(buf);
10305
8.81k
  return(NULL);
10306
8.81k
    }
10307
1.06M
    buf[len++] = cur;
10308
1.06M
    NEXT;
10309
1.06M
    cur=CUR;
10310
8.85M
    while ((cur >= '0') && (cur <= '9')) {
10311
7.79M
  if (len + 1 >= size) {
10312
12.0k
      xmlChar *tmp;
10313
10314
12.0k
      size *= 2;
10315
12.0k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
12.0k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
12.0k
      buf = tmp;
10322
12.0k
  }
10323
7.79M
  buf[len++] = cur;
10324
7.79M
  NEXT;
10325
7.79M
  cur=CUR;
10326
7.79M
    }
10327
1.06M
    buf[len] = 0;
10328
1.06M
    return(buf);
10329
1.06M
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
1.14M
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
1.14M
    xmlChar *version = NULL;
10349
10350
1.14M
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
1.08M
  SKIP(7);
10352
1.08M
  SKIP_BLANKS;
10353
1.08M
  if (RAW != '=') {
10354
7.91k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
7.91k
      return(NULL);
10356
7.91k
        }
10357
1.07M
  NEXT;
10358
1.07M
  SKIP_BLANKS;
10359
1.07M
  if (RAW == '"') {
10360
939k
      NEXT;
10361
939k
      version = xmlParseVersionNum(ctxt);
10362
939k
      if (RAW != '"') {
10363
22.2k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
22.2k
      } else
10365
917k
          NEXT;
10366
939k
  } else if (RAW == '\''){
10367
135k
      NEXT;
10368
135k
      version = xmlParseVersionNum(ctxt);
10369
135k
      if (RAW != '\'') {
10370
4.76k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
4.76k
      } else
10372
130k
          NEXT;
10373
135k
  } else {
10374
3.39k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
3.39k
  }
10376
1.07M
    }
10377
1.13M
    return(version);
10378
1.14M
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
574k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
574k
    xmlChar *buf = NULL;
10395
574k
    int len = 0;
10396
574k
    int size = 10;
10397
574k
    xmlChar cur;
10398
10399
574k
    cur = CUR;
10400
574k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
574k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
572k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
572k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
572k
  buf[len++] = cur;
10409
572k
  NEXT;
10410
572k
  cur = CUR;
10411
55.7M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
55.7M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
55.7M
         ((cur >= '0') && (cur <= '9')) ||
10414
55.7M
         (cur == '.') || (cur == '_') ||
10415
55.7M
         (cur == '-')) {
10416
55.1M
      if (len + 1 >= size) {
10417
267k
          xmlChar *tmp;
10418
10419
267k
    size *= 2;
10420
267k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
267k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
267k
    buf = tmp;
10427
267k
      }
10428
55.1M
      buf[len++] = cur;
10429
55.1M
      NEXT;
10430
55.1M
      cur = CUR;
10431
55.1M
      if (cur == 0) {
10432
3.56k
          SHRINK;
10433
3.56k
    GROW;
10434
3.56k
    cur = CUR;
10435
3.56k
      }
10436
55.1M
        }
10437
572k
  buf[len] = 0;
10438
572k
    } else {
10439
1.87k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.87k
    }
10441
574k
    return(buf);
10442
574k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
813k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
813k
    xmlChar *encoding = NULL;
10462
10463
813k
    SKIP_BLANKS;
10464
813k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
578k
  SKIP(8);
10466
578k
  SKIP_BLANKS;
10467
578k
  if (RAW != '=') {
10468
2.95k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
2.95k
      return(NULL);
10470
2.95k
        }
10471
575k
  NEXT;
10472
575k
  SKIP_BLANKS;
10473
575k
  if (RAW == '"') {
10474
474k
      NEXT;
10475
474k
      encoding = xmlParseEncName(ctxt);
10476
474k
      if (RAW != '"') {
10477
23.5k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
23.5k
    xmlFree((xmlChar *) encoding);
10479
23.5k
    return(NULL);
10480
23.5k
      } else
10481
450k
          NEXT;
10482
474k
  } else if (RAW == '\''){
10483
99.8k
      NEXT;
10484
99.8k
      encoding = xmlParseEncName(ctxt);
10485
99.8k
      if (RAW != '\'') {
10486
1.19k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
1.19k
    xmlFree((xmlChar *) encoding);
10488
1.19k
    return(NULL);
10489
1.19k
      } else
10490
98.6k
          NEXT;
10491
99.8k
  } else {
10492
950
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
950
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
550k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
28.7k
      xmlFree((xmlChar *) encoding);
10500
28.7k
            return(NULL);
10501
28.7k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
521k
        if ((encoding != NULL) &&
10508
521k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
520k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
0
      if ((ctxt->encoding == NULL) &&
10517
0
          (ctxt->input->buf != NULL) &&
10518
0
          (ctxt->input->buf->encoder == NULL)) {
10519
0
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
0
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
0
      }
10522
0
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
0
      ctxt->encoding = encoding;
10525
0
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
521k
        else if ((encoding != NULL) &&
10530
521k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
520k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
257k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
257k
      ctxt->encoding = encoding;
10535
257k
  }
10536
264k
  else if (encoding != NULL) {
10537
263k
      xmlCharEncodingHandlerPtr handler;
10538
10539
263k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
263k
      ctxt->input->encoding = encoding;
10542
10543
263k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
263k
      if (handler != NULL) {
10545
257k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
60
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
60
        return(NULL);
10549
60
    }
10550
257k
      } else {
10551
6.45k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
6.45k
      "Unsupported encoding %s\n", encoding);
10553
6.45k
    return(NULL);
10554
6.45k
      }
10555
263k
  }
10556
521k
    }
10557
750k
    return(encoding);
10558
813k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
620k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
620k
    int standalone = -2;
10596
10597
620k
    SKIP_BLANKS;
10598
620k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
153k
  SKIP(10);
10600
153k
        SKIP_BLANKS;
10601
153k
  if (RAW != '=') {
10602
1.34k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
1.34k
      return(standalone);
10604
1.34k
        }
10605
152k
  NEXT;
10606
152k
  SKIP_BLANKS;
10607
152k
        if (RAW == '\''){
10608
98.0k
      NEXT;
10609
98.0k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
93.2k
          standalone = 0;
10611
93.2k
                SKIP(2);
10612
93.2k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
4.78k
                 (NXT(2) == 's')) {
10614
3.98k
          standalone = 1;
10615
3.98k
    SKIP(3);
10616
3.98k
            } else {
10617
804
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
804
      }
10619
98.0k
      if (RAW != '\'') {
10620
1.13k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
1.13k
      } else
10622
96.9k
          NEXT;
10623
98.0k
  } else if (RAW == '"'){
10624
53.9k
      NEXT;
10625
53.9k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
39.2k
          standalone = 0;
10627
39.2k
    SKIP(2);
10628
39.2k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
14.7k
                 (NXT(2) == 's')) {
10630
11.1k
          standalone = 1;
10631
11.1k
                SKIP(3);
10632
11.1k
            } else {
10633
3.66k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
3.66k
      }
10635
53.9k
      if (RAW != '"') {
10636
4.84k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
4.84k
      } else
10638
49.1k
          NEXT;
10639
53.9k
  } else {
10640
255
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
255
        }
10642
152k
    }
10643
619k
    return(standalone);
10644
620k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
1.10M
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
1.10M
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
1.10M
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
1.10M
    SKIP(5);
10672
10673
1.10M
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
1.10M
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
1.10M
    version = xmlParseVersionInfo(ctxt);
10683
1.10M
    if (version == NULL) {
10684
77.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
1.02M
    } else {
10686
1.02M
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
9.40k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
2.07k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
2.07k
                "Unsupported version '%s'\n",
10693
2.07k
                version);
10694
7.33k
      } else {
10695
7.33k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
6.58k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
6.58k
                      "Unsupported version '%s'\n",
10698
6.58k
          version, NULL);
10699
6.58k
    } else {
10700
742
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
742
              "Unsupported version '%s'\n",
10702
742
              version);
10703
742
    }
10704
7.33k
      }
10705
9.40k
  }
10706
1.02M
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
1.02M
  ctxt->version = version;
10709
1.02M
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
1.10M
    if (!IS_BLANK_CH(RAW)) {
10715
427k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
329k
      SKIP(2);
10717
329k
      return;
10718
329k
  }
10719
98.2k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
98.2k
    }
10721
771k
    xmlParseEncodingDecl(ctxt);
10722
771k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
771k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
6.12k
        return;
10728
6.12k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
764k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
146k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
144k
      SKIP(2);
10736
144k
      return;
10737
144k
  }
10738
2.16k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
2.16k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
620k
    GROW;
10745
10746
620k
    SKIP_BLANKS;
10747
620k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
620k
    SKIP_BLANKS;
10750
620k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
441k
        SKIP(2);
10752
441k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
2.28k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
2.28k
  NEXT;
10756
176k
    } else {
10757
176k
        int c;
10758
10759
176k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
35.2M
        while ((c = CUR) != 0) {
10761
35.2M
            NEXT;
10762
35.2M
            if (c == '>')
10763
160k
                break;
10764
35.2M
        }
10765
176k
    }
10766
620k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
1.28M
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
1.46M
    while (ctxt->instate != XML_PARSER_EOF) {
10782
1.46M
        SKIP_BLANKS;
10783
1.46M
        GROW;
10784
1.46M
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
76.1k
      xmlParsePI(ctxt);
10786
1.39M
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
104k
      xmlParseComment(ctxt);
10788
1.28M
        } else {
10789
1.28M
            break;
10790
1.28M
        }
10791
1.46M
    }
10792
1.28M
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
642k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
642k
    xmlChar start[4];
10812
642k
    xmlCharEncoding enc;
10813
10814
642k
    xmlInitParser();
10815
10816
642k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
642k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
642k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
642k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
642k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
642k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
642k
    if ((ctxt->encoding == NULL) &&
10835
642k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
641k
  start[0] = RAW;
10842
641k
  start[1] = NXT(1);
10843
641k
  start[2] = NXT(2);
10844
641k
  start[3] = NXT(3);
10845
641k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
641k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
400k
      xmlSwitchEncoding(ctxt, enc);
10848
400k
  }
10849
641k
    }
10850
10851
10852
642k
    if (CUR == 0) {
10853
722
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
722
  return(-1);
10855
722
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
641k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
21.7k
       GROW;
10865
21.7k
    }
10866
641k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
371k
  xmlParseXMLDecl(ctxt);
10872
371k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
371k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
2.05k
      return(-1);
10878
2.05k
  }
10879
369k
  ctxt->standalone = ctxt->input->standalone;
10880
369k
  SKIP_BLANKS;
10881
369k
    } else {
10882
269k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
269k
    }
10884
639k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
589k
        ctxt->sax->startDocument(ctxt->userData);
10886
639k
    if (ctxt->instate == XML_PARSER_EOF)
10887
2.82k
  return(-1);
10888
636k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
636k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
636k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
636k
    GROW;
10903
636k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
400k
  ctxt->inSubset = 1;
10906
400k
  xmlParseDocTypeDecl(ctxt);
10907
400k
  if (RAW == '[') {
10908
285k
      ctxt->instate = XML_PARSER_DTD;
10909
285k
      xmlParseInternalSubset(ctxt);
10910
285k
      if (ctxt->instate == XML_PARSER_EOF)
10911
116k
    return(-1);
10912
285k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
284k
  ctxt->inSubset = 2;
10918
284k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
284k
      (!ctxt->disableSAX))
10920
244k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
244k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
284k
  if (ctxt->instate == XML_PARSER_EOF)
10923
40.8k
      return(-1);
10924
243k
  ctxt->inSubset = 0;
10925
10926
243k
        xmlCleanSpecialAttr(ctxt);
10927
10928
243k
  ctxt->instate = XML_PARSER_PROLOG;
10929
243k
  xmlParseMisc(ctxt);
10930
243k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
479k
    GROW;
10936
479k
    if (RAW != '<') {
10937
70.2k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
70.2k
           "Start tag expected, '<' not found\n");
10939
408k
    } else {
10940
408k
  ctxt->instate = XML_PARSER_CONTENT;
10941
408k
  xmlParseElement(ctxt);
10942
408k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
408k
  xmlParseMisc(ctxt);
10949
10950
408k
  if (RAW != 0) {
10951
130k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
130k
  }
10953
408k
  ctxt->instate = XML_PARSER_EOF;
10954
408k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
479k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
479k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
479k
    if ((ctxt->myDoc != NULL) &&
10966
479k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
3.47k
  xmlFreeDoc(ctxt->myDoc);
10968
3.47k
  ctxt->myDoc = NULL;
10969
3.47k
    }
10970
10971
479k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
59.9k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
59.9k
  if (ctxt->valid)
10974
47.6k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
59.9k
  if (ctxt->nsWellFormed)
10976
54.4k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
59.9k
  if (ctxt->options & XML_PARSE_OLD10)
10978
5.97k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
59.9k
    }
10980
479k
    if (! ctxt->wellFormed) {
10981
419k
  ctxt->valid = 0;
10982
419k
  return(-1);
10983
419k
    }
10984
59.9k
    return(0);
10985
479k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
87.7M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
87.7M
    const xmlChar *cur;
11110
11111
87.7M
    if (ctxt->checkIndex == 0) {
11112
78.4M
        cur = ctxt->input->cur + 1;
11113
78.4M
    } else {
11114
9.31M
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
9.31M
    }
11116
11117
87.7M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
9.33M
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
9.33M
        return(0);
11120
78.3M
    } else {
11121
78.3M
        ctxt->checkIndex = 0;
11122
78.3M
        return(1);
11123
78.3M
    }
11124
87.7M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
38.8M
                     const char *str, size_t strLen) {
11138
38.8M
    const xmlChar *cur, *term;
11139
11140
38.8M
    if (ctxt->checkIndex == 0) {
11141
16.2M
        cur = ctxt->input->cur + startDelta;
11142
22.5M
    } else {
11143
22.5M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
22.5M
    }
11145
11146
38.8M
    term = BAD_CAST strstr((const char *) cur, str);
11147
38.8M
    if (term == NULL) {
11148
26.8M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
26.8M
        if ((size_t) (end - cur) < strLen)
11152
197k
            end = cur;
11153
26.6M
        else
11154
26.6M
            end -= strLen - 1;
11155
26.8M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
26.8M
    } else {
11157
12.0M
        ctxt->checkIndex = 0;
11158
12.0M
    }
11159
11160
38.8M
    return(term);
11161
38.8M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
177M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
177M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
177M
    const xmlChar *end = ctxt->input->end;
11173
11174
2.68G
    while (cur < end) {
11175
2.67G
        if ((*cur == '<') || (*cur == '&')) {
11176
160M
            ctxt->checkIndex = 0;
11177
160M
            return(1);
11178
160M
        }
11179
2.51G
        cur++;
11180
2.51G
    }
11181
11182
16.4M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
16.4M
    return(0);
11184
177M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
253M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
253M
    const xmlChar *cur;
11196
253M
    const xmlChar *end = ctxt->input->end;
11197
253M
    int state = ctxt->endCheckState;
11198
11199
253M
    if (ctxt->checkIndex == 0)
11200
197M
        cur = ctxt->input->cur + 1;
11201
56.6M
    else
11202
56.6M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
10.8G
    while (cur < end) {
11205
10.8G
        if (state) {
11206
6.44G
            if (*cur == state)
11207
441M
                state = 0;
11208
6.44G
        } else if (*cur == '\'' || *cur == '"') {
11209
441M
            state = *cur;
11210
3.91G
        } else if (*cur == '>') {
11211
197M
            ctxt->checkIndex = 0;
11212
197M
            ctxt->endCheckState = 0;
11213
197M
            return(1);
11214
197M
        }
11215
10.6G
        cur++;
11216
10.6G
    }
11217
11218
56.8M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
56.8M
    ctxt->endCheckState = state;
11220
56.8M
    return(0);
11221
253M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
14.3M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
14.3M
    const xmlChar *cur, *start;
11240
14.3M
    const xmlChar *end = ctxt->input->end;
11241
14.3M
    int state = ctxt->endCheckState;
11242
11243
14.3M
    if (ctxt->checkIndex == 0) {
11244
540k
        cur = ctxt->input->cur + 1;
11245
13.8M
    } else {
11246
13.8M
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
13.8M
    }
11248
14.3M
    start = cur;
11249
11250
3.21G
    while (cur < end) {
11251
3.20G
        if (state == '-') {
11252
782M
            if ((*cur == '-') &&
11253
782M
                (cur[1] == '-') &&
11254
782M
                (cur[2] == '>')) {
11255
1.40M
                state = 0;
11256
1.40M
                cur += 3;
11257
1.40M
                start = cur;
11258
1.40M
                continue;
11259
1.40M
            }
11260
782M
        }
11261
2.42G
        else if (state == ']') {
11262
575k
            if (*cur == '>') {
11263
441k
                ctxt->checkIndex = 0;
11264
441k
                ctxt->endCheckState = 0;
11265
441k
                return(1);
11266
441k
            }
11267
134k
            if (IS_BLANK_CH(*cur)) {
11268
94.1k
                state = ' ';
11269
94.1k
            } else if (*cur != ']') {
11270
26.5k
                state = 0;
11271
26.5k
                start = cur;
11272
26.5k
                continue;
11273
26.5k
            }
11274
134k
        }
11275
2.41G
        else if (state == ' ') {
11276
108k
            if (*cur == '>') {
11277
3.94k
                ctxt->checkIndex = 0;
11278
3.94k
                ctxt->endCheckState = 0;
11279
3.94k
                return(1);
11280
3.94k
            }
11281
104k
            if (!IS_BLANK_CH(*cur)) {
11282
90.1k
                state = 0;
11283
90.1k
                start = cur;
11284
90.1k
                continue;
11285
90.1k
            }
11286
104k
        }
11287
2.41G
        else if (state != 0) {
11288
1.50G
            if (*cur == state) {
11289
43.3M
                state = 0;
11290
43.3M
                start = cur + 1;
11291
43.3M
            }
11292
1.50G
        }
11293
915M
        else if (*cur == '<') {
11294
20.0M
            if ((cur[1] == '!') &&
11295
20.0M
                (cur[2] == '-') &&
11296
20.0M
                (cur[3] == '-')) {
11297
1.41M
                state = '-';
11298
1.41M
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
1.41M
                start = cur;
11301
1.41M
                continue;
11302
1.41M
            }
11303
20.0M
        }
11304
895M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
43.9M
            state = *cur;
11306
43.9M
        }
11307
11308
3.19G
        cur++;
11309
3.19G
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
13.9M
    if ((state == 0) || (state == '-')) {
11316
7.34M
        if (cur - start < 3)
11317
740k
            cur = start;
11318
6.60M
        else
11319
6.60M
            cur -= 3;
11320
7.34M
    }
11321
13.9M
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
13.9M
    ctxt->endCheckState = state;
11323
13.9M
    return(0);
11324
14.3M
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
13.3M
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
13.3M
    int ix;
11340
13.3M
    unsigned char c;
11341
13.3M
    int codepoint;
11342
11343
13.3M
    if ((utf == NULL) || (len <= 0))
11344
2.98k
        return(0);
11345
11346
748M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
745M
        c = utf[ix];
11348
745M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
719M
      if (c >= 0x20)
11350
606M
    ix++;
11351
112M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
112M
          ix++;
11353
561k
      else
11354
561k
          return(-ix);
11355
719M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
13.0M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
13.0M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
6.02M
          return(-ix);
11359
7.00M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
7.00M
      codepoint |= utf[ix+1] & 0x3f;
11361
7.00M
      if (!xmlIsCharQ(codepoint))
11362
9.09k
          return(-ix);
11363
6.99M
      ix += 2;
11364
13.6M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
5.16M
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
5.15M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
5.15M
          ((utf[ix+2] & 0xc0) != 0x80))
11368
724k
        return(-ix);
11369
4.43M
      codepoint = (utf[ix] & 0xf) << 12;
11370
4.43M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
4.43M
      codepoint |= utf[ix+2] & 0x3f;
11372
4.43M
      if (!xmlIsCharQ(codepoint))
11373
1.35k
          return(-ix);
11374
4.43M
      ix += 3;
11375
8.47M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
6.71M
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
6.20M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
6.20M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
6.20M
    ((utf[ix+3] & 0xc0) != 0x80))
11380
1.36M
        return(-ix);
11381
4.83M
      codepoint = (utf[ix] & 0x7) << 18;
11382
4.83M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
4.83M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
4.83M
      codepoint |= utf[ix+3] & 0x3f;
11385
4.83M
      if (!xmlIsCharQ(codepoint))
11386
14.8k
          return(-ix);
11387
4.81M
      ix += 4;
11388
4.81M
  } else       /* unknown encoding */
11389
1.75M
      return(-ix);
11390
745M
      }
11391
2.39M
      return(ix);
11392
13.3M
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
136M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
136M
    int ret = 0;
11406
136M
    int avail, tlen;
11407
136M
    xmlChar cur, next;
11408
11409
136M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
136M
    if ((ctxt->input != NULL) &&
11466
136M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
2.00M
        xmlParserInputShrink(ctxt->input);
11468
2.00M
    }
11469
11470
991M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
991M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
562k
      return(0);
11473
11474
990M
  if (ctxt->input == NULL) break;
11475
990M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
990M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
990M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
990M
          (ctxt->input->buf->raw != NULL) &&
11488
990M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
11.4M
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
11.4M
                                                 ctxt->input);
11491
11.4M
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
11.4M
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
11.4M
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
11.4M
                                      base, current);
11496
11.4M
      }
11497
990M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
990M
        (ctxt->input->cur - ctxt->input->base);
11499
990M
  }
11500
990M
        if (avail < 1)
11501
3.36M
      goto done;
11502
987M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
10.6M
            case XML_PARSER_START:
11509
10.6M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
886k
        xmlChar start[4];
11511
886k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
886k
        if (avail < 4)
11517
5.36k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
881k
        start[0] = RAW;
11527
881k
        start[1] = NXT(1);
11528
881k
        start[2] = NXT(2);
11529
881k
        start[3] = NXT(3);
11530
881k
        enc = xmlDetectCharEncoding(start, 4);
11531
881k
        xmlSwitchEncoding(ctxt, enc);
11532
881k
        break;
11533
886k
    }
11534
11535
9.78M
    if (avail < 2)
11536
201
        goto done;
11537
9.78M
    cur = ctxt->input->cur[0];
11538
9.78M
    next = ctxt->input->cur[1];
11539
9.78M
    if (cur == 0) {
11540
1.31k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
1.31k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
1.31k
                  &xmlDefaultSAXLocator);
11543
1.31k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
1.31k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
1.31k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
1.31k
      ctxt->sax->endDocument(ctxt->userData);
11551
1.31k
        goto done;
11552
1.31k
    }
11553
9.78M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
9.29M
        if (avail < 5) goto done;
11556
9.29M
        if ((!terminate) &&
11557
9.29M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
8.51M
      goto done;
11559
773k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
773k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
773k
                  &xmlDefaultSAXLocator);
11562
773k
        if ((ctxt->input->cur[2] == 'x') &&
11563
773k
      (ctxt->input->cur[3] == 'm') &&
11564
773k
      (ctxt->input->cur[4] == 'l') &&
11565
773k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
728k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
728k
      xmlParseXMLDecl(ctxt);
11572
728k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
4.07k
          xmlHaltParser(ctxt);
11578
4.07k
          return(0);
11579
4.07k
      }
11580
724k
      ctxt->standalone = ctxt->input->standalone;
11581
724k
      if ((ctxt->encoding == NULL) &&
11582
724k
          (ctxt->input->encoding != NULL))
11583
171k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
724k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
724k
          (!ctxt->disableSAX))
11586
637k
          ctxt->sax->startDocument(ctxt->userData);
11587
724k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
724k
        } else {
11593
44.5k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
44.5k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
44.5k
          (!ctxt->disableSAX))
11596
44.5k
          ctxt->sax->startDocument(ctxt->userData);
11597
44.5k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
44.5k
        }
11603
773k
    } else {
11604
490k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
490k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
490k
                  &xmlDefaultSAXLocator);
11607
490k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
490k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
490k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
490k
            (!ctxt->disableSAX))
11614
490k
      ctxt->sax->startDocument(ctxt->userData);
11615
490k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
490k
    }
11621
1.26M
    break;
11622
267M
            case XML_PARSER_START_TAG: {
11623
267M
          const xmlChar *name;
11624
267M
    const xmlChar *prefix = NULL;
11625
267M
    const xmlChar *URI = NULL;
11626
267M
                int line = ctxt->input->line;
11627
267M
    int nsNr = ctxt->nsNr;
11628
11629
267M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
267M
    cur = ctxt->input->cur[0];
11632
267M
          if (cur != '<') {
11633
37.3k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
37.3k
        xmlHaltParser(ctxt);
11635
37.3k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
37.3k
      ctxt->sax->endDocument(ctxt->userData);
11637
37.3k
        goto done;
11638
37.3k
    }
11639
267M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
53.2M
                    goto done;
11641
214M
    if (ctxt->spaceNr == 0)
11642
221k
        spacePush(ctxt, -1);
11643
213M
    else if (*ctxt->space == -2)
11644
13.4M
        spacePush(ctxt, -1);
11645
200M
    else
11646
200M
        spacePush(ctxt, *ctxt->space);
11647
214M
#ifdef LIBXML_SAX1_ENABLED
11648
214M
    if (ctxt->sax2)
11649
153M
#endif /* LIBXML_SAX1_ENABLED */
11650
153M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
60.3M
#ifdef LIBXML_SAX1_ENABLED
11652
60.3M
    else
11653
60.3M
        name = xmlParseStartTag(ctxt);
11654
214M
#endif /* LIBXML_SAX1_ENABLED */
11655
214M
    if (ctxt->instate == XML_PARSER_EOF)
11656
5.69k
        goto done;
11657
214M
    if (name == NULL) {
11658
58.4k
        spacePop(ctxt);
11659
58.4k
        xmlHaltParser(ctxt);
11660
58.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
58.4k
      ctxt->sax->endDocument(ctxt->userData);
11662
58.4k
        goto done;
11663
58.4k
    }
11664
213M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
213M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
213M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
213M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
213M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
156M
        SKIP(2);
11680
11681
156M
        if (ctxt->sax2) {
11682
107M
      if ((ctxt->sax != NULL) &&
11683
107M
          (ctxt->sax->endElementNs != NULL) &&
11684
107M
          (!ctxt->disableSAX))
11685
107M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
107M
                                  prefix, URI);
11687
107M
      if (ctxt->nsNr - nsNr > 0)
11688
12.0k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
107M
#ifdef LIBXML_SAX1_ENABLED
11690
107M
        } else {
11691
49.5M
      if ((ctxt->sax != NULL) &&
11692
49.5M
          (ctxt->sax->endElement != NULL) &&
11693
49.5M
          (!ctxt->disableSAX))
11694
49.5M
          ctxt->sax->endElement(ctxt->userData, name);
11695
49.5M
#endif /* LIBXML_SAX1_ENABLED */
11696
49.5M
        }
11697
156M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
156M
        spacePop(ctxt);
11700
156M
        if (ctxt->nameNr == 0) {
11701
31.3k
      ctxt->instate = XML_PARSER_EPILOG;
11702
156M
        } else {
11703
156M
      ctxt->instate = XML_PARSER_CONTENT;
11704
156M
        }
11705
156M
        break;
11706
156M
    }
11707
57.0M
    if (RAW == '>') {
11708
56.3M
        NEXT;
11709
56.3M
    } else {
11710
759k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
759k
           "Couldn't find end of Start Tag %s\n",
11712
759k
           name);
11713
759k
        nodePop(ctxt);
11714
759k
        spacePop(ctxt);
11715
759k
    }
11716
57.0M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
57.0M
    ctxt->instate = XML_PARSER_CONTENT;
11719
57.0M
                break;
11720
213M
      }
11721
614M
            case XML_PARSER_CONTENT: {
11722
614M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
1.87M
        goto done;
11724
612M
    cur = ctxt->input->cur[0];
11725
612M
    next = ctxt->input->cur[1];
11726
11727
612M
    if ((cur == '<') && (next == '/')) {
11728
55.0M
        ctxt->instate = XML_PARSER_END_TAG;
11729
55.0M
        break;
11730
557M
          } else if ((cur == '<') && (next == '?')) {
11731
1.07M
        if ((!terminate) &&
11732
1.07M
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
1.01M
      goto done;
11734
59.0k
        xmlParsePI(ctxt);
11735
59.0k
        ctxt->instate = XML_PARSER_CONTENT;
11736
556M
    } else if ((cur == '<') && (next != '!')) {
11737
213M
        ctxt->instate = XML_PARSER_START_TAG;
11738
213M
        break;
11739
342M
    } else if ((cur == '<') && (next == '!') &&
11740
342M
               (ctxt->input->cur[2] == '-') &&
11741
342M
         (ctxt->input->cur[3] == '-')) {
11742
10.0M
        if ((!terminate) &&
11743
10.0M
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
8.23M
      goto done;
11745
1.82M
        xmlParseComment(ctxt);
11746
1.82M
        ctxt->instate = XML_PARSER_CONTENT;
11747
332M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
332M
        (ctxt->input->cur[2] == '[') &&
11749
332M
        (ctxt->input->cur[3] == 'C') &&
11750
332M
        (ctxt->input->cur[4] == 'D') &&
11751
332M
        (ctxt->input->cur[5] == 'A') &&
11752
332M
        (ctxt->input->cur[6] == 'T') &&
11753
332M
        (ctxt->input->cur[7] == 'A') &&
11754
332M
        (ctxt->input->cur[8] == '[')) {
11755
350k
        SKIP(9);
11756
350k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
350k
        break;
11758
332M
    } else if ((cur == '<') && (next == '!') &&
11759
332M
               (avail < 9)) {
11760
80.0k
        goto done;
11761
332M
    } else if (cur == '<') {
11762
1.77M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
1.77M
                    "detected an error in element content\n");
11764
1.77M
                    SKIP(1);
11765
330M
    } else if (cur == '&') {
11766
32.9M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
7.82M
      goto done;
11768
25.0M
        xmlParseReference(ctxt);
11769
297M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
297M
        if ((ctxt->inputNr == 1) &&
11783
297M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
177M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
16.4M
          goto done;
11786
177M
                    }
11787
281M
                    ctxt->checkIndex = 0;
11788
281M
        xmlParseCharData(ctxt, 0);
11789
281M
    }
11790
309M
    break;
11791
612M
      }
11792
309M
            case XML_PARSER_END_TAG:
11793
56.5M
    if (avail < 2)
11794
0
        goto done;
11795
56.5M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
1.50M
        goto done;
11797
55.0M
    if (ctxt->sax2) {
11798
45.0M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
45.0M
        nameNsPop(ctxt);
11800
45.0M
    }
11801
9.97M
#ifdef LIBXML_SAX1_ENABLED
11802
9.97M
      else
11803
9.97M
        xmlParseEndTag1(ctxt, 0);
11804
55.0M
#endif /* LIBXML_SAX1_ENABLED */
11805
55.0M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
55.0M
    } else if (ctxt->nameNr == 0) {
11808
138k
        ctxt->instate = XML_PARSER_EPILOG;
11809
54.8M
    } else {
11810
54.8M
        ctxt->instate = XML_PARSER_CONTENT;
11811
54.8M
    }
11812
55.0M
    break;
11813
16.9M
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
16.9M
    const xmlChar *term;
11819
11820
16.9M
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
22.9k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
22.9k
                                           "]]>");
11827
16.9M
                } else {
11828
16.9M
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
16.9M
                }
11830
11831
16.9M
    if (term == NULL) {
11832
7.73M
        int tmp, size;
11833
11834
7.73M
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
9.86k
                        size = ctxt->input->end - ctxt->input->cur;
11837
7.72M
                    } else {
11838
7.72M
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
3.58M
                            goto done;
11840
4.13M
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
4.13M
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
4.13M
                    }
11844
4.14M
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
4.14M
                    if (tmp <= 0) {
11846
2.03M
                        tmp = -tmp;
11847
2.03M
                        ctxt->input->cur += tmp;
11848
2.03M
                        goto encoding_error;
11849
2.03M
                    }
11850
2.10M
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
2.10M
                        if (ctxt->sax->cdataBlock != NULL)
11852
1.47M
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
1.47M
                                                  ctxt->input->cur, tmp);
11854
633k
                        else if (ctxt->sax->characters != NULL)
11855
633k
                            ctxt->sax->characters(ctxt->userData,
11856
633k
                                                  ctxt->input->cur, tmp);
11857
2.10M
                    }
11858
2.10M
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
2.10M
                    SKIPL(tmp);
11861
9.24M
    } else {
11862
9.24M
                    int base = term - CUR_PTR;
11863
9.24M
        int tmp;
11864
11865
9.24M
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
9.24M
        if ((tmp < 0) || (tmp != base)) {
11867
8.92M
      tmp = -tmp;
11868
8.92M
      ctxt->input->cur += tmp;
11869
8.92M
      goto encoding_error;
11870
8.92M
        }
11871
315k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
315k
            (ctxt->sax->cdataBlock != NULL) &&
11873
315k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
2.76k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
2.76k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
2.76k
                     "<![CDATA[", 9)))
11882
2.73k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
2.73k
                                 BAD_CAST "", 0);
11884
312k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
312k
      (!ctxt->disableSAX)) {
11886
312k
      if (ctxt->sax->cdataBlock != NULL)
11887
274k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
274k
              ctxt->input->cur, base);
11889
38.0k
      else if (ctxt->sax->characters != NULL)
11890
38.0k
          ctxt->sax->characters(ctxt->userData,
11891
38.0k
              ctxt->input->cur, base);
11892
312k
        }
11893
315k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
315k
        SKIPL(base + 3);
11896
315k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
315k
    }
11902
2.42M
    break;
11903
16.9M
      }
11904
5.70M
            case XML_PARSER_MISC:
11905
6.71M
            case XML_PARSER_PROLOG:
11906
6.91M
            case XML_PARSER_EPILOG:
11907
6.91M
    SKIP_BLANKS;
11908
6.91M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
6.91M
    else
11912
6.91M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
6.91M
                (ctxt->input->cur - ctxt->input->base);
11914
6.91M
    if (avail < 2)
11915
157k
        goto done;
11916
6.75M
    cur = ctxt->input->cur[0];
11917
6.75M
    next = ctxt->input->cur[1];
11918
6.75M
          if ((cur == '<') && (next == '?')) {
11919
538k
        if ((!terminate) &&
11920
538k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
404k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
134k
        xmlParsePI(ctxt);
11927
134k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
6.21M
    } else if ((cur == '<') && (next == '!') &&
11930
6.21M
        (ctxt->input->cur[2] == '-') &&
11931
6.21M
        (ctxt->input->cur[3] == '-')) {
11932
1.12M
        if ((!terminate) &&
11933
1.12M
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
943k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
181k
        xmlParseComment(ctxt);
11940
181k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
5.09M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
5.09M
                    (cur == '<') && (next == '!') &&
11944
5.09M
        (ctxt->input->cur[2] == 'D') &&
11945
5.09M
        (ctxt->input->cur[3] == 'O') &&
11946
5.09M
        (ctxt->input->cur[4] == 'C') &&
11947
5.09M
        (ctxt->input->cur[5] == 'T') &&
11948
5.09M
        (ctxt->input->cur[6] == 'Y') &&
11949
5.09M
        (ctxt->input->cur[7] == 'P') &&
11950
5.09M
        (ctxt->input->cur[8] == 'E')) {
11951
4.30M
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
3.53M
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
767k
        ctxt->inSubset = 1;
11958
767k
        xmlParseDocTypeDecl(ctxt);
11959
767k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
767k
        if (RAW == '[') {
11962
550k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
550k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
217k
      ctxt->inSubset = 2;
11972
217k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
217k
          (ctxt->sax->externalSubset != NULL))
11974
176k
          ctxt->sax->externalSubset(ctxt->userData,
11975
176k
            ctxt->intSubName, ctxt->extSubSystem,
11976
176k
            ctxt->extSubURI);
11977
217k
      ctxt->inSubset = 0;
11978
217k
      xmlCleanSpecialAttr(ctxt);
11979
217k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
217k
        }
11985
784k
    } else if ((cur == '<') && (next == '!') &&
11986
784k
               (avail <
11987
25.2k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
1.38k
        goto done;
11989
783k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
17.8k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
17.8k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
17.8k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
17.8k
      ctxt->sax->endDocument(ctxt->userData);
11998
17.8k
        goto done;
11999
765k
                } else {
12000
765k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
765k
    }
12006
1.85M
    break;
12007
14.4M
            case XML_PARSER_DTD: {
12008
14.4M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
13.9M
                    goto done;
12010
548k
    xmlParseInternalSubset(ctxt);
12011
548k
    if (ctxt->instate == XML_PARSER_EOF)
12012
220k
        goto done;
12013
328k
    ctxt->inSubset = 2;
12014
328k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
328k
        (ctxt->sax->externalSubset != NULL))
12016
315k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
315k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
328k
    ctxt->inSubset = 0;
12019
328k
    xmlCleanSpecialAttr(ctxt);
12020
328k
    if (ctxt->instate == XML_PARSER_EOF)
12021
27.8k
        goto done;
12022
300k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
300k
                break;
12028
328k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
987M
  }
12102
987M
    }
12103
125M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
125M
    return(ret);
12108
10.9M
encoding_error:
12109
10.9M
    {
12110
10.9M
        char buffer[150];
12111
12112
10.9M
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
10.9M
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
10.9M
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
10.9M
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
10.9M
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
10.9M
         BAD_CAST buffer, NULL);
12118
10.9M
    }
12119
10.9M
    return(0);
12120
136M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
199M
              int terminate) {
12136
199M
    int end_in_lf = 0;
12137
199M
    int remain = 0;
12138
12139
199M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
199M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
62.9M
        return(ctxt->errNo);
12143
136M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.64k
        return(-1);
12145
136M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
136M
    ctxt->progressive = 1;
12149
136M
    if (ctxt->instate == XML_PARSER_START)
12150
9.69M
        xmlDetectSAX2(ctxt);
12151
136M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
136M
        (chunk[size - 1] == '\r')) {
12153
2.24M
  end_in_lf = 1;
12154
2.24M
  size--;
12155
2.24M
    }
12156
12157
136M
xmldecl_done:
12158
12159
136M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
136M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
136M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
136M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
136M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
136M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
136M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
173k
            unsigned int len = 45;
12173
12174
173k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
173k
                               BAD_CAST "UTF-16")) ||
12176
173k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
117k
                               BAD_CAST "UTF16")))
12178
56.4k
                len = 90;
12179
117k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
117k
                                    BAD_CAST "UCS-4")) ||
12181
117k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
115k
                                    BAD_CAST "UCS4")))
12183
1.61k
                len = 180;
12184
12185
173k
            if (ctxt->input->buf->rawconsumed < len)
12186
31.8k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
173k
            if ((unsigned int) size > len) {
12194
128k
                remain = size - len;
12195
128k
                size = len;
12196
128k
            } else {
12197
45.2k
                remain = 0;
12198
45.2k
            }
12199
173k
        }
12200
136M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
136M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
136M
  if (res < 0) {
12203
11.2k
      ctxt->errNo = XML_PARSER_EOF;
12204
11.2k
      xmlHaltParser(ctxt);
12205
11.2k
      return (XML_PARSER_EOF);
12206
11.2k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
136M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
689k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
689k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
689k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
689k
        (in->raw != NULL)) {
12216
120k
    int nbchars;
12217
120k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
120k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
120k
    nbchars = xmlCharEncInput(in, terminate);
12221
120k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
120k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
7.07k
        xmlGenericError(xmlGenericErrorContext,
12225
7.07k
            "xmlParseChunk: encoder error\n");
12226
7.07k
                    xmlHaltParser(ctxt);
12227
7.07k
        return(XML_ERR_INVALID_ENCODING);
12228
7.07k
    }
12229
120k
      }
12230
689k
  }
12231
689k
    }
12232
12233
136M
    if (remain != 0) {
12234
121k
        xmlParseTryOrFinish(ctxt, 0);
12235
136M
    } else {
12236
136M
        xmlParseTryOrFinish(ctxt, terminate);
12237
136M
    }
12238
136M
    if (ctxt->instate == XML_PARSER_EOF)
12239
375k
        return(ctxt->errNo);
12240
12241
136M
    if ((ctxt->input != NULL) &&
12242
136M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
136M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
136M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
136M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
579k
        return(ctxt->errNo);
12250
12251
135M
    if (remain != 0) {
12252
116k
        chunk += size;
12253
116k
        size = remain;
12254
116k
        remain = 0;
12255
116k
        goto xmldecl_done;
12256
116k
    }
12257
135M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
135M
        (ctxt->input->buf != NULL)) {
12259
2.24M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
2.24M
           ctxt->input);
12261
2.24M
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
2.24M
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
2.24M
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
2.24M
            base, current);
12267
2.24M
    }
12268
135M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
227k
  int cur_avail = 0;
12273
12274
227k
  if (ctxt->input != NULL) {
12275
227k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
227k
      else
12279
227k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
227k
                    (ctxt->input->cur - ctxt->input->base);
12281
227k
  }
12282
12283
227k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
227k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
95.5k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
95.5k
  }
12287
227k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
484
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
484
  }
12290
227k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
227k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
227k
    ctxt->sax->endDocument(ctxt->userData);
12293
227k
  }
12294
227k
  ctxt->instate = XML_PARSER_EOF;
12295
227k
    }
12296
135M
    if (ctxt->wellFormed == 0)
12297
49.2M
  return((xmlParserErrors) ctxt->errNo);
12298
86.3M
    else
12299
86.3M
        return(0);
12300
135M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
1.28M
                        const char *chunk, int size, const char *filename) {
12330
1.28M
    xmlParserCtxtPtr ctxt;
12331
1.28M
    xmlParserInputPtr inputStream;
12332
1.28M
    xmlParserInputBufferPtr buf;
12333
1.28M
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
1.28M
    if ((chunk != NULL) && (size >= 4))
12339
641k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
1.28M
    buf = xmlAllocParserInputBuffer(enc);
12342
1.28M
    if (buf == NULL) return(NULL);
12343
12344
1.28M
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
1.28M
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
1.28M
    ctxt->dictNames = 1;
12351
1.28M
    if (filename == NULL) {
12352
642k
  ctxt->directory = NULL;
12353
642k
    } else {
12354
642k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
642k
    }
12356
12357
1.28M
    inputStream = xmlNewInputStream(ctxt);
12358
1.28M
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
1.28M
    if (filename == NULL)
12365
642k
  inputStream->filename = NULL;
12366
642k
    else {
12367
642k
  inputStream->filename = (char *)
12368
642k
      xmlCanonicPath((const xmlChar *) filename);
12369
642k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
642k
    }
12376
1.28M
    inputStream->buf = buf;
12377
1.28M
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
1.28M
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
1.28M
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
1.28M
    if ((size != 0) && (chunk != NULL) &&
12388
1.28M
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
641k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
641k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
641k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
641k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
641k
    }
12399
12400
1.28M
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
400k
        xmlSwitchEncoding(ctxt, enc);
12402
400k
    }
12403
12404
1.28M
    return(ctxt);
12405
1.28M
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
1.26M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
1.26M
    if (ctxt == NULL)
12418
0
        return;
12419
1.26M
    ctxt->instate = XML_PARSER_EOF;
12420
1.26M
    ctxt->disableSAX = 1;
12421
1.44M
    while (ctxt->inputNr > 1)
12422
177k
        xmlFreeInputStream(inputPop(ctxt));
12423
1.26M
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
1.26M
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
1.26M
        if (ctxt->input->buf != NULL) {
12433
1.08M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
1.08M
            ctxt->input->buf = NULL;
12435
1.08M
        }
12436
1.26M
  ctxt->input->cur = BAD_CAST"";
12437
1.26M
        ctxt->input->length = 0;
12438
1.26M
  ctxt->input->base = ctxt->input->cur;
12439
1.26M
        ctxt->input->end = ctxt->input->cur;
12440
1.26M
    }
12441
1.26M
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
642k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
642k
    if (ctxt == NULL)
12452
0
        return;
12453
642k
    xmlHaltParser(ctxt);
12454
642k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
642k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
460k
          const xmlChar *ID, xmlNodePtr *list) {
12832
460k
    xmlParserCtxtPtr ctxt;
12833
460k
    xmlDocPtr newDoc;
12834
460k
    xmlNodePtr newRoot;
12835
460k
    xmlParserErrors ret = XML_ERR_OK;
12836
460k
    xmlChar start[4];
12837
460k
    xmlCharEncoding enc;
12838
12839
460k
    if (((depth > 40) &&
12840
460k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
460k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
460k
    if (list != NULL)
12848
130k
        *list = NULL;
12849
460k
    if ((URL == NULL) && (ID == NULL))
12850
782
  return(XML_ERR_INTERNAL_ERROR);
12851
459k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
459k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
459k
                                             oldctxt);
12856
459k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
96.4k
    if (oldctxt != NULL) {
12858
96.4k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
96.4k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
96.4k
    }
12861
96.4k
    xmlDetectSAX2(ctxt);
12862
12863
96.4k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
96.4k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
96.4k
    newDoc->properties = XML_DOC_INTERNAL;
12869
96.4k
    if (doc) {
12870
96.4k
        newDoc->intSubset = doc->intSubset;
12871
96.4k
        newDoc->extSubset = doc->extSubset;
12872
96.4k
        if (doc->dict) {
12873
85.6k
            newDoc->dict = doc->dict;
12874
85.6k
            xmlDictReference(newDoc->dict);
12875
85.6k
        }
12876
96.4k
        if (doc->URL != NULL) {
12877
63.7k
            newDoc->URL = xmlStrdup(doc->URL);
12878
63.7k
        }
12879
96.4k
    }
12880
96.4k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
96.4k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
96.4k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
96.4k
    nodePush(ctxt, newDoc->children);
12891
96.4k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
96.4k
    } else {
12894
96.4k
        ctxt->myDoc = doc;
12895
96.4k
        newRoot->doc = doc;
12896
96.4k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
96.4k
    GROW;
12904
96.4k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
95.4k
  start[0] = RAW;
12906
95.4k
  start[1] = NXT(1);
12907
95.4k
  start[2] = NXT(2);
12908
95.4k
  start[3] = NXT(3);
12909
95.4k
  enc = xmlDetectCharEncoding(start, 4);
12910
95.4k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
17.2k
      xmlSwitchEncoding(ctxt, enc);
12912
17.2k
  }
12913
95.4k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
96.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
4.63k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
4.63k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
4.63k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
189
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
189
                           "Version mismatch between document and entity\n");
12927
189
        }
12928
4.63k
    }
12929
12930
96.4k
    ctxt->instate = XML_PARSER_CONTENT;
12931
96.4k
    ctxt->depth = depth;
12932
96.4k
    if (oldctxt != NULL) {
12933
96.4k
  ctxt->_private = oldctxt->_private;
12934
96.4k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
96.4k
  ctxt->validate = oldctxt->validate;
12936
96.4k
  ctxt->valid = oldctxt->valid;
12937
96.4k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
96.4k
        if (oldctxt->validate) {
12939
12.5k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
12.5k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
12.5k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
12.5k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
12.5k
        }
12944
96.4k
  ctxt->external = oldctxt->external;
12945
96.4k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
96.4k
        ctxt->dict = oldctxt->dict;
12947
96.4k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
96.4k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
96.4k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
96.4k
        ctxt->dictNames = oldctxt->dictNames;
12951
96.4k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
96.4k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
96.4k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
96.4k
  ctxt->record_info = oldctxt->record_info;
12955
96.4k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
96.4k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
96.4k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
96.4k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
96.4k
    xmlParseContent(ctxt);
12970
12971
96.4k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
4.01k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
92.4k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
96.4k
    if (ctxt->node != newDoc->children) {
12977
11.0k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
11.0k
    }
12979
12980
96.4k
    if (!ctxt->wellFormed) {
12981
43.8k
  ret = (xmlParserErrors)ctxt->errNo;
12982
43.8k
        if (oldctxt != NULL) {
12983
43.8k
            oldctxt->errNo = ctxt->errNo;
12984
43.8k
            oldctxt->wellFormed = 0;
12985
43.8k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
43.8k
        }
12987
52.6k
    } else {
12988
52.6k
  if (list != NULL) {
12989
49.4k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
49.4k
      cur = newDoc->children->children;
12996
49.4k
      *list = cur;
12997
131M
      while (cur != NULL) {
12998
131M
    cur->parent = NULL;
12999
131M
    cur = cur->next;
13000
131M
      }
13001
49.4k
            newDoc->children->children = NULL;
13002
49.4k
  }
13003
52.6k
  ret = XML_ERR_OK;
13004
52.6k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
96.4k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
96.4k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
96.4k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
96.4k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
96.4k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
96.4k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
96.4k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
96.4k
    }
13020
13021
96.4k
    if (oldctxt != NULL) {
13022
96.4k
        ctxt->dict = NULL;
13023
96.4k
        ctxt->attsDefault = NULL;
13024
96.4k
        ctxt->attsSpecial = NULL;
13025
96.4k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
96.4k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
96.4k
        oldctxt->validate = ctxt->validate;
13028
96.4k
        oldctxt->valid = ctxt->valid;
13029
96.4k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
96.4k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
96.4k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
96.4k
    }
13033
96.4k
    ctxt->node_seq.maximum = 0;
13034
96.4k
    ctxt->node_seq.length = 0;
13035
96.4k
    ctxt->node_seq.buffer = NULL;
13036
96.4k
    xmlFreeParserCtxt(ctxt);
13037
96.4k
    newDoc->intSubset = NULL;
13038
96.4k
    newDoc->extSubset = NULL;
13039
96.4k
    xmlFreeDoc(newDoc);
13040
13041
96.4k
    return(ret);
13042
96.4k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
770k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
770k
    xmlParserCtxtPtr ctxt;
13125
770k
    xmlDocPtr newDoc = NULL;
13126
770k
    xmlNodePtr newRoot;
13127
770k
    xmlSAXHandlerPtr oldsax = NULL;
13128
770k
    xmlNodePtr content = NULL;
13129
770k
    xmlNodePtr last = NULL;
13130
770k
    int size;
13131
770k
    xmlParserErrors ret = XML_ERR_OK;
13132
770k
#ifdef SAX2
13133
770k
    int i;
13134
770k
#endif
13135
13136
770k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
770k
        (oldctxt->depth >  100)) {
13138
204
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
204
                       "Maximum entity nesting depth exceeded");
13140
204
  return(XML_ERR_ENTITY_LOOP);
13141
204
    }
13142
13143
13144
770k
    if (lst != NULL)
13145
770k
        *lst = NULL;
13146
770k
    if (string == NULL)
13147
50
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
770k
    size = xmlStrlen(string);
13150
13151
770k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
770k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
770k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
770k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
770k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
770k
    else
13158
770k
  ctxt->userData = ctxt;
13159
770k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
770k
    ctxt->dict = oldctxt->dict;
13161
770k
    ctxt->input_id = oldctxt->input_id;
13162
770k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
770k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
770k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
770k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
771k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
819
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
819
    }
13171
770k
#endif
13172
13173
770k
    oldsax = ctxt->sax;
13174
770k
    ctxt->sax = oldctxt->sax;
13175
770k
    xmlDetectSAX2(ctxt);
13176
770k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
770k
    ctxt->options = oldctxt->options;
13178
13179
770k
    ctxt->_private = oldctxt->_private;
13180
770k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
770k
    } else {
13193
770k
  ctxt->myDoc = oldctxt->myDoc;
13194
770k
        content = ctxt->myDoc->children;
13195
770k
  last = ctxt->myDoc->last;
13196
770k
    }
13197
770k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
770k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
770k
    ctxt->myDoc->children = NULL;
13208
770k
    ctxt->myDoc->last = NULL;
13209
770k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
770k
    nodePush(ctxt, ctxt->myDoc->children);
13211
770k
    ctxt->instate = XML_PARSER_CONTENT;
13212
770k
    ctxt->depth = oldctxt->depth;
13213
13214
770k
    ctxt->validate = 0;
13215
770k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
770k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
765k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
765k
    }
13222
770k
    ctxt->dictNames = oldctxt->dictNames;
13223
770k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
770k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
770k
    xmlParseContent(ctxt);
13227
770k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
702
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
769k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
770k
    if (ctxt->node != ctxt->myDoc->children) {
13233
1.15k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
1.15k
    }
13235
13236
770k
    if (!ctxt->wellFormed) {
13237
17.3k
  ret = (xmlParserErrors)ctxt->errNo;
13238
17.3k
        oldctxt->errNo = ctxt->errNo;
13239
17.3k
        oldctxt->wellFormed = 0;
13240
17.3k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
753k
    } else {
13242
753k
        ret = XML_ERR_OK;
13243
753k
    }
13244
13245
770k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
753k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
753k
  cur = ctxt->myDoc->children->children;
13253
753k
  *lst = cur;
13254
2.58M
  while (cur != NULL) {
13255
1.82M
#ifdef LIBXML_VALID_ENABLED
13256
1.82M
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
1.82M
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
1.82M
    (cur->type == XML_ELEMENT_NODE)) {
13259
7.65k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
7.65k
      oldctxt->myDoc, cur);
13261
7.65k
      }
13262
1.82M
#endif /* LIBXML_VALID_ENABLED */
13263
1.82M
      cur->parent = NULL;
13264
1.82M
      cur = cur->next;
13265
1.82M
  }
13266
753k
  ctxt->myDoc->children->children = NULL;
13267
753k
    }
13268
770k
    if (ctxt->myDoc != NULL) {
13269
770k
  xmlFreeNode(ctxt->myDoc->children);
13270
770k
        ctxt->myDoc->children = content;
13271
770k
        ctxt->myDoc->last = last;
13272
770k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
770k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
770k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
770k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
770k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
770k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
770k
    }
13285
13286
770k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
770k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
770k
    ctxt->sax = oldsax;
13289
770k
    ctxt->dict = NULL;
13290
770k
    ctxt->attsDefault = NULL;
13291
770k
    ctxt->attsSpecial = NULL;
13292
770k
    xmlFreeParserCtxt(ctxt);
13293
770k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
770k
    return(ret);
13298
770k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
459k
        xmlParserCtxtPtr pctx) {
13783
459k
    xmlParserCtxtPtr ctxt;
13784
459k
    xmlParserInputPtr inputStream;
13785
459k
    char *directory = NULL;
13786
459k
    xmlChar *uri;
13787
13788
459k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
459k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
459k
    if (pctx != NULL) {
13794
459k
        ctxt->options = pctx->options;
13795
459k
        ctxt->_private = pctx->_private;
13796
459k
  ctxt->input_id = pctx->input_id;
13797
459k
    }
13798
13799
    /* Don't read from stdin. */
13800
459k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
459k
    uri = xmlBuildURI(URL, base);
13804
13805
459k
    if (uri == NULL) {
13806
3.06k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
3.06k
  if (inputStream == NULL) {
13808
3.06k
      xmlFreeParserCtxt(ctxt);
13809
3.06k
      return(NULL);
13810
3.06k
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
456k
    } else {
13819
456k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
456k
  if (inputStream == NULL) {
13821
359k
      xmlFree(uri);
13822
359k
      xmlFreeParserCtxt(ctxt);
13823
359k
      return(NULL);
13824
359k
  }
13825
13826
96.4k
  inputPush(ctxt, inputStream);
13827
13828
96.4k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
96.4k
      directory = xmlParserGetDirectory((char *)uri);
13830
96.4k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
96.4k
      ctxt->directory = directory;
13832
96.4k
  xmlFree(uri);
13833
96.4k
    }
13834
96.4k
    return(ctxt);
13835
459k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
1.41M
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
1.41M
    xmlParserCtxtPtr ctxt;
14178
1.41M
    xmlParserInputPtr input;
14179
1.41M
    xmlParserInputBufferPtr buf;
14180
14181
1.41M
    if (buffer == NULL)
14182
0
  return(NULL);
14183
1.41M
    if (size <= 0)
14184
145
  return(NULL);
14185
14186
1.41M
    ctxt = xmlNewParserCtxt();
14187
1.41M
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
1.41M
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
1.41M
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
1.41M
    input = xmlNewInputStream(ctxt);
14197
1.41M
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
1.41M
    input->filename = NULL;
14204
1.41M
    input->buf = buf;
14205
1.41M
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
1.41M
    inputPush(ctxt, input);
14208
1.41M
    return(ctxt);
14209
1.41M
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
6.07G
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
6.07G
    if (xmlParserInitialized != 0)
14525
6.07G
  return;
14526
14527
3.70k
#ifdef LIBXML_THREAD_ENABLED
14528
3.70k
    __xmlGlobalInitMutexLock();
14529
3.70k
    if (xmlParserInitialized == 0) {
14530
3.70k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.70k
  xmlInitThreadsInternal();
14537
3.70k
  xmlInitGlobalsInternal();
14538
3.70k
  xmlInitMemoryInternal();
14539
3.70k
        __xmlInitializeDict();
14540
3.70k
  xmlInitEncodingInternal();
14541
3.70k
  xmlRegisterDefaultInputCallbacks();
14542
3.70k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.70k
  xmlRegisterDefaultOutputCallbacks();
14544
3.70k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.70k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.70k
  xmlInitXPathInternal();
14547
3.70k
#endif
14548
3.70k
  xmlParserInitialized = 1;
14549
3.70k
#ifdef LIBXML_THREAD_ENABLED
14550
3.70k
    }
14551
3.70k
    __xmlGlobalInitMutexUnlock();
14552
3.70k
#endif
14553
3.70k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
1.92M
{
14843
1.92M
    if (ctxt == NULL)
14844
0
        return(-1);
14845
1.92M
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
1.92M
    if (options & XML_PARSE_RECOVER) {
14851
418k
        ctxt->recovery = 1;
14852
418k
        options -= XML_PARSE_RECOVER;
14853
418k
  ctxt->options |= XML_PARSE_RECOVER;
14854
418k
    } else
14855
1.50M
        ctxt->recovery = 0;
14856
1.92M
    if (options & XML_PARSE_DTDLOAD) {
14857
1.72M
        ctxt->loadsubset = XML_DETECT_IDS;
14858
1.72M
        options -= XML_PARSE_DTDLOAD;
14859
1.72M
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
1.72M
    } else
14861
204k
        ctxt->loadsubset = 0;
14862
1.92M
    if (options & XML_PARSE_DTDATTR) {
14863
289k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
289k
        options -= XML_PARSE_DTDATTR;
14865
289k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
289k
    }
14867
1.92M
    if (options & XML_PARSE_NOENT) {
14868
1.52M
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
1.52M
        options -= XML_PARSE_NOENT;
14871
1.52M
  ctxt->options |= XML_PARSE_NOENT;
14872
1.52M
    } else
14873
404k
        ctxt->replaceEntities = 0;
14874
1.92M
    if (options & XML_PARSE_PEDANTIC) {
14875
99.6k
        ctxt->pedantic = 1;
14876
99.6k
        options -= XML_PARSE_PEDANTIC;
14877
99.6k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
99.6k
    } else
14879
1.82M
        ctxt->pedantic = 0;
14880
1.92M
    if (options & XML_PARSE_NOBLANKS) {
14881
324k
        ctxt->keepBlanks = 0;
14882
324k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
324k
        options -= XML_PARSE_NOBLANKS;
14884
324k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
324k
    } else
14886
1.60M
        ctxt->keepBlanks = 1;
14887
1.92M
    if (options & XML_PARSE_DTDVALID) {
14888
385k
        ctxt->validate = 1;
14889
385k
        if (options & XML_PARSE_NOWARNING)
14890
167k
            ctxt->vctxt.warning = NULL;
14891
385k
        if (options & XML_PARSE_NOERROR)
14892
221k
            ctxt->vctxt.error = NULL;
14893
385k
        options -= XML_PARSE_DTDVALID;
14894
385k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
385k
    } else
14896
1.54M
        ctxt->validate = 0;
14897
1.92M
    if (options & XML_PARSE_NOWARNING) {
14898
334k
        ctxt->sax->warning = NULL;
14899
334k
        options -= XML_PARSE_NOWARNING;
14900
334k
    }
14901
1.92M
    if (options & XML_PARSE_NOERROR) {
14902
373k
        ctxt->sax->error = NULL;
14903
373k
        ctxt->sax->fatalError = NULL;
14904
373k
        options -= XML_PARSE_NOERROR;
14905
373k
    }
14906
1.92M
#ifdef LIBXML_SAX1_ENABLED
14907
1.92M
    if (options & XML_PARSE_SAX1) {
14908
381k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
381k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
381k
        ctxt->sax->startElementNs = NULL;
14911
381k
        ctxt->sax->endElementNs = NULL;
14912
381k
        ctxt->sax->initialized = 1;
14913
381k
        options -= XML_PARSE_SAX1;
14914
381k
  ctxt->options |= XML_PARSE_SAX1;
14915
381k
    }
14916
1.92M
#endif /* LIBXML_SAX1_ENABLED */
14917
1.92M
    if (options & XML_PARSE_NODICT) {
14918
319k
        ctxt->dictNames = 0;
14919
319k
        options -= XML_PARSE_NODICT;
14920
319k
  ctxt->options |= XML_PARSE_NODICT;
14921
1.60M
    } else {
14922
1.60M
        ctxt->dictNames = 1;
14923
1.60M
    }
14924
1.92M
    if (options & XML_PARSE_NOCDATA) {
14925
303k
        ctxt->sax->cdataBlock = NULL;
14926
303k
        options -= XML_PARSE_NOCDATA;
14927
303k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
303k
    }
14929
1.92M
    if (options & XML_PARSE_NSCLEAN) {
14930
397k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
397k
        options -= XML_PARSE_NSCLEAN;
14932
397k
    }
14933
1.92M
    if (options & XML_PARSE_NONET) {
14934
228k
  ctxt->options |= XML_PARSE_NONET;
14935
228k
        options -= XML_PARSE_NONET;
14936
228k
    }
14937
1.92M
    if (options & XML_PARSE_COMPACT) {
14938
851k
  ctxt->options |= XML_PARSE_COMPACT;
14939
851k
        options -= XML_PARSE_COMPACT;
14940
851k
    }
14941
1.92M
    if (options & XML_PARSE_OLD10) {
14942
380k
  ctxt->options |= XML_PARSE_OLD10;
14943
380k
        options -= XML_PARSE_OLD10;
14944
380k
    }
14945
1.92M
    if (options & XML_PARSE_NOBASEFIX) {
14946
292k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
292k
        options -= XML_PARSE_NOBASEFIX;
14948
292k
    }
14949
1.92M
    if (options & XML_PARSE_HUGE) {
14950
245k
  ctxt->options |= XML_PARSE_HUGE;
14951
245k
        options -= XML_PARSE_HUGE;
14952
245k
        if (ctxt->dict != NULL)
14953
245k
            xmlDictSetLimit(ctxt->dict, 0);
14954
245k
    }
14955
1.92M
    if (options & XML_PARSE_OLDSAX) {
14956
300k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
300k
        options -= XML_PARSE_OLDSAX;
14958
300k
    }
14959
1.92M
    if (options & XML_PARSE_IGNORE_ENC) {
14960
351k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
351k
        options -= XML_PARSE_IGNORE_ENC;
14962
351k
    }
14963
1.92M
    if (options & XML_PARSE_BIG_LINES) {
14964
377k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
377k
        options -= XML_PARSE_BIG_LINES;
14966
377k
    }
14967
1.92M
    ctxt->linenumbers = 1;
14968
1.92M
    return (options);
14969
1.92M
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
1.28M
{
14984
1.28M
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
1.28M
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
642k
{
15003
642k
    xmlDocPtr ret;
15004
15005
642k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
642k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
642k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
642k
        (ctxt->input->filename == NULL))
15015
642k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
642k
    xmlParseDocument(ctxt);
15017
642k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
197k
        ret = ctxt->myDoc;
15019
444k
    else {
15020
444k
        ret = NULL;
15021
444k
  if (ctxt->myDoc != NULL) {
15022
394k
      xmlFreeDoc(ctxt->myDoc);
15023
394k
  }
15024
444k
    }
15025
642k
    ctxt->myDoc = NULL;
15026
642k
    if (!reuse) {
15027
642k
  xmlFreeParserCtxt(ctxt);
15028
642k
    }
15029
15030
642k
    return (ret);
15031
642k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
642k
{
15096
642k
    xmlParserCtxtPtr ctxt;
15097
15098
642k
    xmlInitParser();
15099
642k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
642k
    if (ctxt == NULL)
15101
76
        return (NULL);
15102
642k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
642k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387