Coverage Report

Created: 2024-09-27 03:18

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
4.61M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
498
#define XML_PARSER_NON_LINEAR 10
129
130
34.8M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
171M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
7.72G
#define XML_PARSER_BUFFER_SIZE 100
147
772k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
57.0M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
106k
{
215
106k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
106k
        (ctxt->instate == XML_PARSER_EOF))
217
10
  return;
218
106k
    if (ctxt != NULL)
219
106k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
106k
    if (prefix == NULL)
222
61.3k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
61.3k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
61.3k
                        (const char *) localname, NULL, NULL, 0, 0,
225
61.3k
                        "Attribute %s redefined\n", localname);
226
45.5k
    else
227
45.5k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
45.5k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
45.5k
                        (const char *) prefix, (const char *) localname,
230
45.5k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
45.5k
                        localname);
232
106k
    if (ctxt != NULL) {
233
106k
  ctxt->wellFormed = 0;
234
106k
  if (ctxt->recovery == 0)
235
26.1k
      ctxt->disableSAX = 1;
236
106k
    }
237
106k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
5.74M
{
250
5.74M
    const char *errmsg;
251
252
5.74M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
5.74M
        (ctxt->instate == XML_PARSER_EOF))
254
23.9k
  return;
255
5.71M
    switch (error) {
256
142k
        case XML_ERR_INVALID_HEX_CHARREF:
257
142k
            errmsg = "CharRef: invalid hexadecimal value";
258
142k
            break;
259
215k
        case XML_ERR_INVALID_DEC_CHARREF:
260
215k
            errmsg = "CharRef: invalid decimal value";
261
215k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
2.40M
        case XML_ERR_INTERNAL_ERROR:
266
2.40M
            errmsg = "internal error";
267
2.40M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
10.5k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
10.5k
            errmsg = "PEReference: expecting ';'";
282
10.5k
            break;
283
1.77k
        case XML_ERR_ENTITY_LOOP:
284
1.77k
            errmsg = "Detected an entity reference loop";
285
1.77k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
5.89k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
5.89k
            errmsg = "PEReferences forbidden in internal subset";
291
5.89k
            break;
292
4.70k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
4.70k
            errmsg = "EntityValue: \" or ' expected";
294
4.70k
            break;
295
137k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
137k
            errmsg = "AttValue: \" or ' expected";
297
137k
            break;
298
620k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
620k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
620k
            break;
301
10.5k
        case XML_ERR_LITERAL_NOT_STARTED:
302
10.5k
            errmsg = "SystemLiteral \" or ' expected";
303
10.5k
            break;
304
14.8k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
14.8k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
14.8k
            break;
307
285k
        case XML_ERR_MISPLACED_CDATA_END:
308
285k
            errmsg = "Sequence ']]>' not allowed in content";
309
285k
            break;
310
8.96k
        case XML_ERR_URI_REQUIRED:
311
8.96k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
8.96k
            break;
313
1.63k
        case XML_ERR_PUBID_REQUIRED:
314
1.63k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
1.63k
            break;
316
98.6k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
98.6k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
98.6k
            break;
319
111k
        case XML_ERR_PI_NOT_STARTED:
320
111k
            errmsg = "xmlParsePI : no target name";
321
111k
            break;
322
28.6k
        case XML_ERR_RESERVED_XML_NAME:
323
28.6k
            errmsg = "Invalid PI name";
324
28.6k
            break;
325
1.58k
        case XML_ERR_NOTATION_NOT_STARTED:
326
1.58k
            errmsg = "NOTATION: Name expected here";
327
1.58k
            break;
328
9.46k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
9.46k
            errmsg = "'>' required to close NOTATION declaration";
330
9.46k
            break;
331
15.2k
        case XML_ERR_VALUE_REQUIRED:
332
15.2k
            errmsg = "Entity value required";
333
15.2k
            break;
334
4.29k
        case XML_ERR_URI_FRAGMENT:
335
4.29k
            errmsg = "Fragment not allowed";
336
4.29k
            break;
337
15.7k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
15.7k
            errmsg = "'(' required to start ATTLIST enumeration";
339
15.7k
            break;
340
1.58k
        case XML_ERR_NMTOKEN_REQUIRED:
341
1.58k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
1.58k
            break;
343
6.77k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
6.77k
            errmsg = "')' required to finish ATTLIST enumeration";
345
6.77k
            break;
346
3.30k
        case XML_ERR_MIXED_NOT_STARTED:
347
3.30k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
3.30k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
9.52k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
9.52k
            errmsg = "ContentDecl : Name or '(' expected";
354
9.52k
            break;
355
23.9k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
23.9k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
23.9k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
467k
        case XML_ERR_GT_REQUIRED:
363
467k
            errmsg = "expected '>'";
364
467k
            break;
365
449
        case XML_ERR_CONDSEC_INVALID:
366
449
            errmsg = "XML conditional section '[' expected";
367
449
            break;
368
18.1k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
18.1k
            errmsg = "Content error in the external subset";
370
18.1k
            break;
371
1.73k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
1.73k
            errmsg =
373
1.73k
                "conditional section INCLUDE or IGNORE keyword expected";
374
1.73k
            break;
375
2.13k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.13k
            errmsg = "XML conditional section not closed";
377
2.13k
            break;
378
340
        case XML_ERR_XMLDECL_NOT_STARTED:
379
340
            errmsg = "Text declaration '<?xml' required";
380
340
            break;
381
154k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
154k
            errmsg = "parsing XML declaration: '?>' expected";
383
154k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
395k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
395k
            errmsg = "EntityRef: expecting ';'";
389
395k
            break;
390
43.8k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
43.8k
            errmsg = "DOCTYPE improperly terminated";
392
43.8k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
10.7k
        case XML_ERR_EQUAL_REQUIRED:
397
10.7k
            errmsg = "expected '='";
398
10.7k
            break;
399
28.7k
        case XML_ERR_STRING_NOT_CLOSED:
400
28.7k
            errmsg = "String not closed expecting \" or '";
401
28.7k
            break;
402
7.89k
        case XML_ERR_STRING_NOT_STARTED:
403
7.89k
            errmsg = "String not started expecting ' or \"";
404
7.89k
            break;
405
1.21k
        case XML_ERR_ENCODING_NAME:
406
1.21k
            errmsg = "Invalid XML encoding name";
407
1.21k
            break;
408
1.60k
        case XML_ERR_STANDALONE_VALUE:
409
1.60k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.60k
            break;
411
26.2k
        case XML_ERR_DOCUMENT_EMPTY:
412
26.2k
            errmsg = "Document is empty";
413
26.2k
            break;
414
237k
        case XML_ERR_DOCUMENT_END:
415
237k
            errmsg = "Extra content at the end of the document";
416
237k
            break;
417
8.45k
        case XML_ERR_NOT_WELL_BALANCED:
418
8.45k
            errmsg = "chunk is not well balanced";
419
8.45k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
96.0k
        case XML_ERR_VERSION_MISSING:
424
96.0k
            errmsg = "Malformed declaration expecting version";
425
96.0k
            break;
426
120
        case XML_ERR_NAME_TOO_LONG:
427
120
            errmsg = "Name too long";
428
120
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
19.3k
        default:
435
19.3k
            errmsg = "Unregistered error message";
436
5.71M
    }
437
5.71M
    if (ctxt != NULL)
438
5.71M
  ctxt->errNo = error;
439
5.71M
    if (info == NULL) {
440
3.31M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
3.31M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
3.31M
                        errmsg);
443
3.31M
    } else {
444
2.40M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
2.40M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
2.40M
                        errmsg, info);
447
2.40M
    }
448
5.71M
    if (ctxt != NULL) {
449
5.71M
  ctxt->wellFormed = 0;
450
5.71M
  if (ctxt->recovery == 0)
451
807k
      ctxt->disableSAX = 1;
452
5.71M
    }
453
5.71M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
8.78M
{
467
8.78M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
8.78M
        (ctxt->instate == XML_PARSER_EOF))
469
35
  return;
470
8.78M
    if (ctxt != NULL)
471
8.78M
  ctxt->errNo = error;
472
8.78M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
8.78M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
8.78M
    if (ctxt != NULL) {
475
8.78M
  ctxt->wellFormed = 0;
476
8.78M
  if (ctxt->recovery == 0)
477
1.21M
      ctxt->disableSAX = 1;
478
8.78M
    }
479
8.78M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
1.39M
{
495
1.39M
    xmlStructuredErrorFunc schannel = NULL;
496
497
1.39M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
1.39M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
1.39M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
1.39M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
871k
        schannel = ctxt->sax->serror;
503
1.39M
    if (ctxt != NULL) {
504
1.39M
        __xmlRaiseError(schannel,
505
1.39M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
1.39M
                    ctxt->userData,
507
1.39M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
1.39M
                    XML_ERR_WARNING, NULL, 0,
509
1.39M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
1.39M
        msg, (const char *) str1, (const char *) str2);
511
1.39M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
1.39M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
17.4k
{
533
17.4k
    xmlStructuredErrorFunc schannel = NULL;
534
535
17.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
17.4k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
17.4k
    if (ctxt != NULL) {
539
17.4k
  ctxt->errNo = error;
540
17.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
9.63k
      schannel = ctxt->sax->serror;
542
17.4k
    }
543
17.4k
    if (ctxt != NULL) {
544
17.4k
        __xmlRaiseError(schannel,
545
17.4k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
17.4k
                    ctxt, NULL, XML_FROM_DTD, error,
547
17.4k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
17.4k
        (const char *) str2, NULL, 0, 0,
549
17.4k
        msg, (const char *) str1, (const char *) str2);
550
17.4k
  ctxt->valid = 0;
551
17.4k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
17.4k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
13.5M
{
573
13.5M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
13.5M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
13.5M
    if (ctxt != NULL)
577
13.5M
  ctxt->errNo = error;
578
13.5M
    __xmlRaiseError(NULL, NULL, NULL,
579
13.5M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
13.5M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
13.5M
    if (ctxt != NULL) {
582
13.5M
  ctxt->wellFormed = 0;
583
13.5M
  if (ctxt->recovery == 0)
584
607k
      ctxt->disableSAX = 1;
585
13.5M
    }
586
13.5M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
2.25M
{
604
2.25M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
2.25M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
2.25M
    if (ctxt != NULL)
608
2.25M
  ctxt->errNo = error;
609
2.25M
    __xmlRaiseError(NULL, NULL, NULL,
610
2.25M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
2.25M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
2.25M
        NULL, val, 0, msg, str1, val, str2);
613
2.25M
    if (ctxt != NULL) {
614
2.25M
  ctxt->wellFormed = 0;
615
2.25M
  if (ctxt->recovery == 0)
616
514k
      ctxt->disableSAX = 1;
617
2.25M
    }
618
2.25M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
7.92M
{
633
7.92M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
7.92M
        (ctxt->instate == XML_PARSER_EOF))
635
14
  return;
636
7.92M
    if (ctxt != NULL)
637
7.92M
  ctxt->errNo = error;
638
7.92M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
7.92M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
7.92M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
7.92M
                    val);
642
7.92M
    if (ctxt != NULL) {
643
7.92M
  ctxt->wellFormed = 0;
644
7.92M
  if (ctxt->recovery == 0)
645
1.71M
      ctxt->disableSAX = 1;
646
7.92M
    }
647
7.92M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
206k
{
662
206k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
206k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
206k
    if (ctxt != NULL)
666
206k
  ctxt->errNo = error;
667
206k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
206k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
206k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
206k
                    val);
671
206k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
1.78M
{
689
1.78M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
1.78M
        (ctxt->instate == XML_PARSER_EOF))
691
119
  return;
692
1.78M
    if (ctxt != NULL)
693
1.78M
  ctxt->errNo = error;
694
1.78M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
1.78M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
1.78M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
1.78M
                    info1, info2, info3);
698
1.78M
    if (ctxt != NULL)
699
1.78M
  ctxt->nsWellFormed = 0;
700
1.78M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
86.6k
{
718
86.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
86.6k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
86.6k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
86.6k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
86.6k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
86.6k
                    info1, info2, info3);
725
86.6k
}
726
727
static void
728
129M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
129M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
129M
    else
732
129M
        *dst += val;
733
129M
}
734
735
static void
736
35.5M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
35.5M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
35.5M
    else
740
35.5M
        *dst += val;
741
35.5M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
34.8M
{
770
34.8M
    unsigned long consumed;
771
34.8M
    xmlParserInputPtr input = ctxt->input;
772
34.8M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
34.8M
    consumed = input->parentConsumed;
779
34.8M
    if ((entity == NULL) ||
780
34.8M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
24.0M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
24.0M
        xmlSaturatedAdd(&consumed, input->consumed);
783
24.0M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
24.0M
    }
785
34.8M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
34.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
34.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
34.8M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
34.8M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
498
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
498
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
498
                       "Maximum entity amplification factor exceeded");
803
498
        xmlHaltParser(ctxt);
804
498
        return(1);
805
498
    }
806
807
34.8M
    return(0);
808
34.8M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
1.73M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
1.73M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
1.73M
    (void) sax;
1048
1049
1.73M
    if (ctxt == NULL) return;
1050
1.73M
    sax = ctxt->sax;
1051
1.73M
#ifdef LIBXML_SAX1_ENABLED
1052
1.73M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
1.73M
        ((sax->startElementNs != NULL) ||
1054
1.09M
         (sax->endElementNs != NULL) ||
1055
1.09M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.09M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
1.73M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
1.73M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
1.73M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
1.73M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
1.73M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
1.73M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
140k
{
1103
140k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
159k
    while (*src == 0x20) src++;
1107
7.37M
    while (*src != 0) {
1108
7.23M
  if (*src == 0x20) {
1109
948k
      while (*src == 0x20) src++;
1110
205k
      if (*src != 0)
1111
168k
    *dst++ = 0x20;
1112
7.03M
  } else {
1113
7.03M
      *dst++ = *src++;
1114
7.03M
  }
1115
7.23M
    }
1116
140k
    *dst = 0;
1117
140k
    if (dst == src)
1118
88.3k
       return(NULL);
1119
51.7k
    return(dst);
1120
140k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
111k
{
1136
111k
    int i;
1137
111k
    int remove_head = 0;
1138
111k
    int need_realloc = 0;
1139
111k
    const xmlChar *cur;
1140
1141
111k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
111k
    i = *len;
1144
111k
    if (i <= 0)
1145
4.57k
        return(NULL);
1146
1147
107k
    cur = src;
1148
126k
    while (*cur == 0x20) {
1149
19.3k
        cur++;
1150
19.3k
  remove_head++;
1151
19.3k
    }
1152
1.55M
    while (*cur != 0) {
1153
1.46M
  if (*cur == 0x20) {
1154
103k
      cur++;
1155
103k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
12.8k
          need_realloc = 1;
1157
12.8k
    break;
1158
12.8k
      }
1159
103k
  } else
1160
1.35M
      cur++;
1161
1.46M
    }
1162
107k
    if (need_realloc) {
1163
12.8k
        xmlChar *ret;
1164
1165
12.8k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
12.8k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
12.8k
  xmlAttrNormalizeSpace(ret, ret);
1171
12.8k
  *len = strlen((const char *)ret);
1172
12.8k
        return(ret);
1173
94.3k
    } else if (remove_head) {
1174
5.33k
        *len -= remove_head;
1175
5.33k
        memmove(src, src + remove_head, 1 + *len);
1176
5.33k
  return(src);
1177
5.33k
    }
1178
88.9k
    return(NULL);
1179
107k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
142k
               const xmlChar *value) {
1195
142k
    xmlDefAttrsPtr defaults;
1196
142k
    int len;
1197
142k
    const xmlChar *name;
1198
142k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
142k
    if (ctxt->attsSpecial != NULL) {
1204
110k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
30.1k
      return;
1206
110k
    }
1207
1208
112k
    if (ctxt->attsDefault == NULL) {
1209
36.8k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
36.8k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
36.8k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
112k
    name = xmlSplitQName3(fullname, &len);
1219
112k
    if (name == NULL) {
1220
102k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
102k
  prefix = NULL;
1222
102k
    } else {
1223
10.6k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
10.6k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
10.6k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
112k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
112k
    if (defaults == NULL) {
1232
67.2k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
67.2k
                     (4 * 5) * sizeof(const xmlChar *));
1234
67.2k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
67.2k
  defaults->nbAttrs = 0;
1237
67.2k
  defaults->maxAttrs = 4;
1238
67.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
67.2k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
67.2k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
1.77k
        xmlDefAttrsPtr temp;
1245
1246
1.77k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
1.77k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
1.77k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
1.77k
  defaults = temp;
1251
1.77k
  defaults->maxAttrs *= 2;
1252
1.77k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
1.77k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
1.77k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
112k
    name = xmlSplitQName3(fullattr, &len);
1264
112k
    if (name == NULL) {
1265
93.4k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
93.4k
  prefix = NULL;
1267
93.4k
    } else {
1268
19.3k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
19.3k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
19.3k
    }
1271
1272
112k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
112k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
112k
    len = xmlStrlen(value);
1276
112k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
112k
    if (value == NULL)
1278
0
        goto mem_error;
1279
112k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
112k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
112k
    if (ctxt->external)
1282
42.5k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
70.2k
    else
1284
70.2k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
112k
    defaults->nbAttrs++;
1286
1287
112k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
112k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
1.02M
{
1309
1.02M
    if (ctxt->attsSpecial == NULL) {
1310
82.2k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
82.2k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
82.2k
    }
1314
1315
1.02M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
90.7k
        return;
1317
1318
929k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
929k
                     (void *) (ptrdiff_t) type);
1320
929k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
1.02M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
719k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
719k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
719k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
238k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
238k
    }
1341
719k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
260k
{
1354
260k
    if (ctxt->attsSpecial == NULL)
1355
204k
        return;
1356
1357
55.5k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
55.5k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
10.2k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
10.2k
        ctxt->attsSpecial = NULL;
1362
10.2k
    }
1363
55.5k
    return;
1364
260k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
155k
{
1427
155k
    const xmlChar *cur = lang, *nxt;
1428
1429
155k
    if (cur == NULL)
1430
2.88k
        return (0);
1431
152k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
152k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
152k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
152k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
11.3k
        cur += 2;
1441
65.9k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
65.9k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
54.5k
            cur++;
1444
11.3k
        return(cur[0] == 0);
1445
11.3k
    }
1446
141k
    nxt = cur;
1447
718k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
718k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
577k
           nxt++;
1450
141k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
17.2k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
14.9k
            return(0);
1456
2.30k
        return(1);
1457
17.2k
    }
1458
124k
    if (nxt - cur < 2)
1459
11.2k
        return(0);
1460
    /* we got an ISO 639 code */
1461
112k
    if (nxt[0] == 0)
1462
8.20k
        return(1);
1463
104k
    if (nxt[0] != '-')
1464
6.90k
        return(0);
1465
1466
97.8k
    nxt++;
1467
97.8k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
97.8k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
9.54k
        goto region_m49;
1471
1472
418k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
418k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
330k
           nxt++;
1475
88.2k
    if (nxt - cur == 4)
1476
20.7k
        goto script;
1477
67.5k
    if (nxt - cur == 2)
1478
16.8k
        goto region;
1479
50.7k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
8.85k
        goto variant;
1481
41.8k
    if (nxt - cur != 3)
1482
11.2k
        return(0);
1483
    /* we parsed an extlang */
1484
30.5k
    if (nxt[0] == 0)
1485
1.93k
        return(1);
1486
28.6k
    if (nxt[0] != '-')
1487
3.24k
        return(0);
1488
1489
25.4k
    nxt++;
1490
25.4k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
25.4k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
3.91k
        goto region_m49;
1494
1495
135k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
135k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
114k
           nxt++;
1498
21.4k
    if (nxt - cur == 2)
1499
3.20k
        goto region;
1500
18.2k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
2.62k
        goto variant;
1502
15.6k
    if (nxt - cur != 4)
1503
12.1k
        return(0);
1504
    /* we parsed a script */
1505
24.2k
script:
1506
24.2k
    if (nxt[0] == 0)
1507
1.76k
        return(1);
1508
22.4k
    if (nxt[0] != '-')
1509
4.83k
        return(0);
1510
1511
17.6k
    nxt++;
1512
17.6k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
17.6k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
2.88k
        goto region_m49;
1516
1517
95.2k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
95.2k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
80.4k
           nxt++;
1520
1521
14.7k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
3.53k
        goto variant;
1523
11.2k
    if (nxt - cur != 2)
1524
8.66k
        return(0);
1525
    /* we parsed a region */
1526
25.0k
region:
1527
25.0k
    if (nxt[0] == 0)
1528
3.01k
        return(1);
1529
22.0k
    if (nxt[0] != '-')
1530
10.0k
        return(0);
1531
1532
11.9k
    nxt++;
1533
11.9k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
83.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
83.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
71.1k
           nxt++;
1538
1539
11.9k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
9.49k
        return(0);
1541
1542
    /* we parsed a variant */
1543
17.5k
variant:
1544
17.5k
    if (nxt[0] == 0)
1545
5.88k
        return(1);
1546
11.6k
    if (nxt[0] != '-')
1547
8.53k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
3.08k
    return (1);
1550
1551
16.3k
region_m49:
1552
16.3k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
16.3k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
2.47k
        nxt += 3;
1555
2.47k
        goto region;
1556
2.47k
    }
1557
13.8k
    return(0);
1558
16.3k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
767k
{
1584
767k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
325k
        int i;
1586
969k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
858k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
214k
          if (ctxt->nsTab[i + 1] == URL)
1590
97.2k
        return(-2);
1591
    /* out of scope keep it */
1592
116k
    break;
1593
214k
      }
1594
858k
  }
1595
325k
    }
1596
670k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
88.5k
  ctxt->nsMax = 10;
1598
88.5k
  ctxt->nsNr = 0;
1599
88.5k
  ctxt->nsTab = (const xmlChar **)
1600
88.5k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
88.5k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
581k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
21.3k
        const xmlChar ** tmp;
1608
21.3k
        ctxt->nsMax *= 2;
1609
21.3k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
21.3k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
21.3k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
21.3k
  ctxt->nsTab = tmp;
1617
21.3k
    }
1618
670k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
670k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
670k
    return (ctxt->nsNr);
1621
670k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
223k
{
1634
223k
    int i;
1635
1636
223k
    if (ctxt->nsTab == NULL) return(0);
1637
223k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
223k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
766k
    for (i = 0;i < nr;i++) {
1645
542k
         ctxt->nsNr--;
1646
542k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
542k
    }
1648
223k
    return(nr);
1649
223k
}
1650
#endif
1651
1652
static int
1653
148k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
148k
    const xmlChar **atts;
1655
148k
    int *attallocs;
1656
148k
    int maxatts;
1657
1658
148k
    if (nr + 5 > ctxt->maxatts) {
1659
148k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
148k
  atts = (const xmlChar **) xmlMalloc(
1661
148k
             maxatts * sizeof(const xmlChar *));
1662
148k
  if (atts == NULL) goto mem_error;
1663
148k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
148k
                               (maxatts / 5) * sizeof(int));
1665
148k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
148k
        if (ctxt->maxatts > 0)
1670
962
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
148k
        xmlFree(ctxt->atts);
1672
148k
  ctxt->atts = atts;
1673
148k
  ctxt->attallocs = attallocs;
1674
148k
  ctxt->maxatts = maxatts;
1675
148k
    }
1676
148k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
148k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
11.8M
{
1694
11.8M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
11.8M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
265
        size_t newSize = ctxt->inputMax * 2;
1698
265
        xmlParserInputPtr *tmp;
1699
1700
265
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
265
                                               newSize * sizeof(*tmp));
1702
265
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
265
        ctxt->inputTab = tmp;
1707
265
        ctxt->inputMax = newSize;
1708
265
    }
1709
11.8M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
11.8M
    ctxt->input = value;
1711
11.8M
    return (ctxt->inputNr++);
1712
11.8M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
14.1M
{
1724
14.1M
    xmlParserInputPtr ret;
1725
1726
14.1M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
14.1M
    if (ctxt->inputNr <= 0)
1729
2.39M
        return (NULL);
1730
11.7M
    ctxt->inputNr--;
1731
11.7M
    if (ctxt->inputNr > 0)
1732
10.7M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
981k
    else
1734
981k
        ctxt->input = NULL;
1735
11.7M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
11.7M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
11.7M
    return (ret);
1738
14.1M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
11.8M
{
1751
11.8M
    if (ctxt == NULL) return(0);
1752
11.8M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
68.2k
        xmlNodePtr *tmp;
1754
1755
68.2k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
68.2k
                                      ctxt->nodeMax * 2 *
1757
68.2k
                                      sizeof(ctxt->nodeTab[0]));
1758
68.2k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
68.2k
        ctxt->nodeTab = tmp;
1763
68.2k
  ctxt->nodeMax *= 2;
1764
68.2k
    }
1765
11.8M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
11.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
283
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
283
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
283
        xmlParserMaxDepth);
1770
283
  xmlHaltParser(ctxt);
1771
283
  return(-1);
1772
283
    }
1773
11.8M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
11.8M
    ctxt->node = value;
1775
11.8M
    return (ctxt->nodeNr++);
1776
11.8M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
10.1M
{
1789
10.1M
    xmlNodePtr ret;
1790
1791
10.1M
    if (ctxt == NULL) return(NULL);
1792
10.1M
    if (ctxt->nodeNr <= 0)
1793
515k
        return (NULL);
1794
9.68M
    ctxt->nodeNr--;
1795
9.68M
    if (ctxt->nodeNr > 0)
1796
8.21M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
1.47M
    else
1798
1.47M
        ctxt->node = NULL;
1799
9.68M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
9.68M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
9.68M
    return (ret);
1802
10.1M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
11.6M
{
1821
11.6M
    xmlStartTag *tag;
1822
1823
11.6M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
147k
        const xmlChar * *tmp;
1825
147k
        xmlStartTag *tmp2;
1826
147k
        ctxt->nameMax *= 2;
1827
147k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
147k
                                    ctxt->nameMax *
1829
147k
                                    sizeof(ctxt->nameTab[0]));
1830
147k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
147k
  ctxt->nameTab = tmp;
1835
147k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
147k
                                    ctxt->nameMax *
1837
147k
                                    sizeof(ctxt->pushTab[0]));
1838
147k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
147k
  ctxt->pushTab = tmp2;
1843
11.5M
    } else if (ctxt->pushTab == NULL) {
1844
562k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
562k
                                            sizeof(ctxt->pushTab[0]));
1846
562k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
562k
    }
1849
11.6M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
11.6M
    ctxt->name = value;
1851
11.6M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
11.6M
    tag->prefix = prefix;
1853
11.6M
    tag->URI = URI;
1854
11.6M
    tag->line = line;
1855
11.6M
    tag->nsNr = nsNr;
1856
11.6M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
11.6M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.66M
{
1873
1.66M
    const xmlChar *ret;
1874
1875
1.66M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.66M
    ctxt->nameNr--;
1878
1.66M
    if (ctxt->nameNr > 0)
1879
1.63M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
24.9k
    else
1881
24.9k
        ctxt->name = NULL;
1882
1.66M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.66M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.66M
    return (ret);
1885
1.66M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
5.02M
{
1931
5.02M
    const xmlChar *ret;
1932
1933
5.02M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
5.02M
    ctxt->nameNr--;
1936
5.02M
    if (ctxt->nameNr > 0)
1937
4.83M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
187k
    else
1939
187k
        ctxt->name = NULL;
1940
5.02M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
5.02M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
5.02M
    return (ret);
1943
5.02M
}
1944
1945
14.3M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
14.3M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
82.1k
        int *tmp;
1948
1949
82.1k
  ctxt->spaceMax *= 2;
1950
82.1k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
82.1k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
82.1k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
82.1k
  ctxt->spaceTab = tmp;
1958
82.1k
    }
1959
14.3M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
14.3M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
14.3M
    return(ctxt->spaceNr++);
1962
14.3M
}
1963
1964
12.2M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
12.2M
    int ret;
1966
12.2M
    if (ctxt->spaceNr <= 0) return(0);
1967
12.0M
    ctxt->spaceNr--;
1968
12.0M
    if (ctxt->spaceNr > 0)
1969
11.6M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
441k
    else
1971
441k
        ctxt->space = &ctxt->spaceTab[0];
1972
12.0M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
12.0M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
12.0M
    return(ret);
1975
12.2M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
291M
#define RAW (*ctxt->input->cur)
2013
175M
#define CUR (*ctxt->input->cur)
2014
199M
#define NXT(val) ctxt->input->cur[(val)]
2015
19.5M
#define CUR_PTR ctxt->input->cur
2016
4.05M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
72.6M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
36.6M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
68.2M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
60.4M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
53.2M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
47.0M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
22.3M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
22.3M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
238k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
238k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
56.9M
#define SKIP(val) do {             \
2037
56.9M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
56.9M
    if (*ctxt->input->cur == 0)           \
2039
56.9M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
56.9M
  } while (0)
2041
2042
171k
#define SKIPL(val) do {             \
2043
171k
    int skipl;                \
2044
22.0M
    for(skipl=0; skipl<val; skipl++) {         \
2045
21.9M
  if (*(ctxt->input->cur) == '\n') {       \
2046
372k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
21.5M
  } else ctxt->input->col++;         \
2048
21.9M
  ctxt->input->cur++;           \
2049
21.9M
    }                  \
2050
171k
    if (*ctxt->input->cur == 0)           \
2051
171k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
171k
  } while (0)
2053
2054
129M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
129M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
129M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
129M
  xmlSHRINK (ctxt);
2058
2059
2.53M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
2.53M
    if ((ctxt->input->buf) &&
2062
2.53M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
7.44k
        xmlParserInputShrink(ctxt->input);
2064
2.53M
    if (*ctxt->input->cur == 0)
2065
92.5k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
2.53M
}
2067
2068
392M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
392M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
392M
  xmlGROW (ctxt);
2071
2072
38.3M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
38.3M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
38.3M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
38.3M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
38.3M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
38.3M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
38.3M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
38.3M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
38.3M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
38.3M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
38.3M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
788k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
38.3M
}
2095
2096
83.4M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
160M
#define NEXT xmlNextChar(ctxt)
2099
2100
21.1M
#define NEXT1 {               \
2101
21.1M
  ctxt->input->col++;           \
2102
21.1M
  ctxt->input->cur++;           \
2103
21.1M
  if (*ctxt->input->cur == 0)         \
2104
21.1M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
21.1M
    }
2106
2107
370M
#define NEXTL(l) do {             \
2108
370M
    if (*(ctxt->input->cur) == '\n') {         \
2109
5.02M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
365M
    } else ctxt->input->col++;           \
2111
370M
    ctxt->input->cur += l;        \
2112
370M
  } while (0)
2113
2114
400M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
2.50G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
2.77G
    if (l == 1) b[i++] = v;           \
2119
2.77G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
83.4M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
83.4M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
83.4M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
83.4M
        (ctxt->instate == XML_PARSER_START)) {
2141
48.9M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
48.9M
  cur = ctxt->input->cur;
2146
48.9M
  while (IS_BLANK_CH(*cur)) {
2147
19.5M
      if (*cur == '\n') {
2148
1.78M
    ctxt->input->line++; ctxt->input->col = 1;
2149
17.7M
      } else {
2150
17.7M
    ctxt->input->col++;
2151
17.7M
      }
2152
19.5M
      cur++;
2153
19.5M
      if (res < INT_MAX)
2154
19.5M
    res++;
2155
19.5M
      if (*cur == 0) {
2156
63.2k
    ctxt->input->cur = cur;
2157
63.2k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
63.2k
    cur = ctxt->input->cur;
2159
63.2k
      }
2160
19.5M
  }
2161
48.9M
  ctxt->input->cur = cur;
2162
48.9M
    } else {
2163
34.5M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
119M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
119M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
62.2M
    NEXT;
2168
62.2M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
12.4M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
494k
                    break;
2174
12.0M
          xmlParsePEReference(ctxt);
2175
44.8M
            } else if (CUR == 0) {
2176
10.8M
                unsigned long consumed;
2177
10.8M
                xmlEntityPtr ent;
2178
2179
10.8M
                if (ctxt->inputNr <= 1)
2180
49.8k
                    break;
2181
2182
10.7M
                consumed = ctxt->input->consumed;
2183
10.7M
                xmlSaturatedAddSizeT(&consumed,
2184
10.7M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
10.7M
                ent = ctxt->input->entity;
2191
10.7M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
10.7M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
2.14k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
2.14k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
2.14k
                }
2197
2198
10.7M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
10.7M
                xmlPopInput(ctxt);
2201
33.9M
            } else {
2202
33.9M
                break;
2203
33.9M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
85.0M
      if (res < INT_MAX)
2213
85.0M
    res++;
2214
85.0M
        }
2215
34.5M
    }
2216
83.4M
    return(res);
2217
83.4M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
10.7M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
10.7M
    xmlParserInputPtr input;
2237
2238
10.7M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
10.7M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
10.7M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
10.7M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
10.7M
    input = inputPop(ctxt);
2247
10.7M
    if (input->entity != NULL)
2248
10.7M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
10.7M
    xmlFreeInputStream(input);
2250
10.7M
    if (*ctxt->input->cur == 0)
2251
5.10M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
10.7M
    return(CUR);
2253
10.7M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
10.8M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
10.8M
    int ret;
2267
10.8M
    if (input == NULL) return(-1);
2268
2269
10.8M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
10.8M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
10.8M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
10.8M
    ret = inputPush(ctxt, input);
2285
10.8M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
10.8M
    GROW;
2288
10.8M
    return(ret);
2289
10.8M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.55M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.55M
    int val = 0;
2311
1.55M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.55M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.55M
        (NXT(2) == 'x')) {
2318
595k
  SKIP(3);
2319
595k
  GROW;
2320
1.94M
  while (RAW != ';') { /* loop blocked by count */
2321
1.48M
      if (count++ > 20) {
2322
30.5k
    count = 0;
2323
30.5k
    GROW;
2324
30.5k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
30.5k
      }
2327
1.48M
      if ((RAW >= '0') && (RAW <= '9'))
2328
761k
          val = val * 16 + (CUR - '0');
2329
726k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
453k
          val = val * 16 + (CUR - 'a') + 10;
2331
273k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
138k
          val = val * 16 + (CUR - 'A') + 10;
2333
134k
      else {
2334
134k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
134k
    val = 0;
2336
134k
    break;
2337
134k
      }
2338
1.35M
      if (val > 0x110000)
2339
337k
          val = 0x110000;
2340
2341
1.35M
      NEXT;
2342
1.35M
      count++;
2343
1.35M
  }
2344
595k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
460k
      ctxt->input->col++;
2347
460k
      ctxt->input->cur++;
2348
460k
  }
2349
963k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
963k
  SKIP(2);
2351
963k
  GROW;
2352
3.23M
  while (RAW != ';') { /* loop blocked by count */
2353
2.48M
      if (count++ > 20) {
2354
25.0k
    count = 0;
2355
25.0k
    GROW;
2356
25.0k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
25.0k
      }
2359
2.48M
      if ((RAW >= '0') && (RAW <= '9'))
2360
2.27M
          val = val * 10 + (CUR - '0');
2361
205k
      else {
2362
205k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
205k
    val = 0;
2364
205k
    break;
2365
205k
      }
2366
2.27M
      if (val > 0x110000)
2367
232k
          val = 0x110000;
2368
2369
2.27M
      NEXT;
2370
2.27M
      count++;
2371
2.27M
  }
2372
963k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
757k
      ctxt->input->col++;
2375
757k
      ctxt->input->cur++;
2376
757k
  }
2377
963k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.55M
    if (val >= 0x110000) {
2389
2.76k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
2.76k
                "xmlParseCharRef: character reference out of bounds\n",
2391
2.76k
          val);
2392
1.55M
    } else if (IS_CHAR(val)) {
2393
1.13M
        return(val);
2394
1.13M
    } else {
2395
417k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
417k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
417k
                    val);
2398
417k
    }
2399
420k
    return(0);
2400
1.55M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
228k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
228k
    const xmlChar *ptr;
2423
228k
    xmlChar cur;
2424
228k
    int val = 0;
2425
2426
228k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
228k
    ptr = *str;
2428
228k
    cur = *ptr;
2429
228k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
53.7k
  ptr += 3;
2431
53.7k
  cur = *ptr;
2432
199k
  while (cur != ';') { /* Non input consuming loop */
2433
153k
      if ((cur >= '0') && (cur <= '9'))
2434
69.3k
          val = val * 16 + (cur - '0');
2435
84.4k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
25.4k
          val = val * 16 + (cur - 'a') + 10;
2437
59.0k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
50.7k
          val = val * 16 + (cur - 'A') + 10;
2439
8.27k
      else {
2440
8.27k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
8.27k
    val = 0;
2442
8.27k
    break;
2443
8.27k
      }
2444
145k
      if (val > 0x110000)
2445
66.1k
          val = 0x110000;
2446
2447
145k
      ptr++;
2448
145k
      cur = *ptr;
2449
145k
  }
2450
53.7k
  if (cur == ';')
2451
45.4k
      ptr++;
2452
174k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
174k
  ptr += 2;
2454
174k
  cur = *ptr;
2455
579k
  while (cur != ';') { /* Non input consuming loops */
2456
414k
      if ((cur >= '0') && (cur <= '9'))
2457
405k
          val = val * 10 + (cur - '0');
2458
9.30k
      else {
2459
9.30k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
9.30k
    val = 0;
2461
9.30k
    break;
2462
9.30k
      }
2463
405k
      if (val > 0x110000)
2464
9.33k
          val = 0x110000;
2465
2466
405k
      ptr++;
2467
405k
      cur = *ptr;
2468
405k
  }
2469
174k
  if (cur == ';')
2470
165k
      ptr++;
2471
174k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
228k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
228k
    if (val >= 0x110000) {
2483
268
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
268
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
268
                val);
2486
228k
    } else if (IS_CHAR(val)) {
2487
205k
        return(val);
2488
205k
    } else {
2489
22.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
22.6k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
22.6k
        val);
2492
22.6k
    }
2493
22.9k
    return(0);
2494
228k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.45M
#define growBuffer(buffer, n) {           \
2593
1.45M
    xmlChar *tmp;             \
2594
1.45M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.45M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.45M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.45M
    if (tmp == NULL) goto mem_error;         \
2598
1.45M
    buffer = tmp;             \
2599
1.45M
    buffer##_size = new_size;                                           \
2600
1.45M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
23.5M
                           int check) {
2617
23.5M
    xmlChar *buffer = NULL;
2618
23.5M
    size_t buffer_size = 0;
2619
23.5M
    size_t nbchars = 0;
2620
2621
23.5M
    xmlChar *current = NULL;
2622
23.5M
    xmlChar *rep = NULL;
2623
23.5M
    const xmlChar *last;
2624
23.5M
    xmlEntityPtr ent;
2625
23.5M
    int c,l;
2626
2627
23.5M
    if (str == NULL)
2628
17.7k
        return(NULL);
2629
23.5M
    last = str + len;
2630
2631
23.5M
    if (((ctxt->depth > 40) &&
2632
23.5M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
23.5M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
23.5M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
23.5M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
23.5M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
23.5M
    if (str < last)
2651
23.3M
  c = CUR_SCHAR(str, l);
2652
143k
    else
2653
143k
        c = 0;
2654
1.95G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
1.95G
           (c != end2) && (c != end3) &&
2656
1.95G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
1.93G
  if (c == 0) break;
2659
1.93G
        if ((c == '&') && (str[1] == '#')) {
2660
228k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
228k
      if (val == 0)
2662
22.9k
                goto int_error;
2663
205k
      COPY_BUF(0,buffer,nbchars,val);
2664
205k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
964
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
964
      }
2667
1.93G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
24.3M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
24.3M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
24.3M
      if ((ent != NULL) &&
2674
24.3M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
26.2k
    if (ent->content != NULL) {
2676
26.2k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
26.2k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
990
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
990
        }
2680
26.2k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
24.3M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
21.1M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
184
                    goto int_error;
2688
2689
21.1M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
520
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
520
                    xmlHaltParser(ctxt);
2692
520
                    ent->content[0] = 0;
2693
520
                    goto int_error;
2694
520
                }
2695
2696
21.1M
                ent->flags |= XML_ENT_EXPANDING;
2697
21.1M
    ctxt->depth++;
2698
21.1M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
21.1M
                        ent->length, what, 0, 0, 0, check);
2700
21.1M
    ctxt->depth--;
2701
21.1M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
21.1M
    if (rep == NULL) {
2704
3.86k
                    ent->content[0] = 0;
2705
3.86k
                    goto int_error;
2706
3.86k
                }
2707
2708
21.1M
                current = rep;
2709
4.69G
                while (*current != 0) { /* non input consuming loop */
2710
4.67G
                    buffer[nbchars++] = *current++;
2711
4.67G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
1.96M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
1.96M
                    }
2714
4.67G
                }
2715
21.1M
                xmlFree(rep);
2716
21.1M
                rep = NULL;
2717
21.1M
      } else if (ent != NULL) {
2718
23.1k
    int i = xmlStrlen(ent->name);
2719
23.1k
    const xmlChar *cur = ent->name;
2720
2721
23.1k
    buffer[nbchars++] = '&';
2722
23.1k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
678
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
678
    }
2725
108k
    for (;i > 0;i--)
2726
85.7k
        buffer[nbchars++] = *cur++;
2727
23.1k
    buffer[nbchars++] = ';';
2728
23.1k
      }
2729
1.91G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
400k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
400k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
400k
      if (ent != NULL) {
2735
370k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
2.21k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
2.21k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
2.21k
      (ctxt->validate != 0)) {
2745
2.09k
      xmlLoadEntityContent(ctxt, ent);
2746
2.09k
        } else {
2747
119
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
119
      "not validating will not read content for PE entity %s\n",
2749
119
                          ent->name, NULL);
2750
119
        }
2751
2.21k
    }
2752
2753
370k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
110
                    goto int_error;
2755
2756
370k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
517
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
517
                    xmlHaltParser(ctxt);
2759
517
                    if (ent->content != NULL)
2760
304
                        ent->content[0] = 0;
2761
517
                    goto int_error;
2762
517
                }
2763
2764
370k
                ent->flags |= XML_ENT_EXPANDING;
2765
370k
    ctxt->depth++;
2766
370k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
370k
                        ent->length, what, 0, 0, 0, check);
2768
370k
    ctxt->depth--;
2769
370k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
370k
    if (rep == NULL) {
2772
1.33k
                    if (ent->content != NULL)
2773
364
                        ent->content[0] = 0;
2774
1.33k
                    goto int_error;
2775
1.33k
                }
2776
368k
                current = rep;
2777
1.11G
                while (*current != 0) { /* non input consuming loop */
2778
1.11G
                    buffer[nbchars++] = *current++;
2779
1.11G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
86.7k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
86.7k
                    }
2782
1.11G
                }
2783
368k
                xmlFree(rep);
2784
368k
                rep = NULL;
2785
368k
      }
2786
1.91G
  } else {
2787
1.91G
      COPY_BUF(l,buffer,nbchars,c);
2788
1.91G
      str += l;
2789
1.91G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
472k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
472k
      }
2792
1.91G
  }
2793
1.93G
  if (str < last)
2794
1.91G
      c = CUR_SCHAR(str, l);
2795
23.3M
  else
2796
23.3M
      c = 0;
2797
1.93G
    }
2798
23.5M
    buffer[nbchars] = 0;
2799
23.5M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
29.4k
int_error:
2804
29.4k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
29.4k
    if (buffer != NULL)
2807
29.4k
        xmlFree(buffer);
2808
29.4k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
8.27k
                           xmlChar end3) {
2836
8.27k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
8.27k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
8.27k
                                      end, end2, end3, 0));
2840
8.27k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
272k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
272k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
272k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
272k
                                      end, end2, end3, 0));
2868
272k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
7.93M
                     int blank_chars) {
2890
7.93M
    int i, ret;
2891
7.93M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
7.93M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
1.74M
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
6.18M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
6.18M
        (*(ctxt->space) == -2))
2905
2.70M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
3.47M
    if (blank_chars == 0) {
2911
4.95M
  for (i = 0;i < len;i++)
2912
4.41M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.48M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
2.53M
    if (ctxt->node == NULL) return(0);
2919
2.35M
    if (ctxt->myDoc != NULL) {
2920
2.35M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.35M
        if (ret == 0) return(1);
2922
2.18M
        if (ret == 1) return(0);
2923
2.18M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
2.16M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
2.10M
    if ((ctxt->node->children == NULL) &&
2930
2.10M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
2.08M
    lastChild = xmlGetLastChild(ctxt->node);
2933
2.08M
    if (lastChild == NULL) {
2934
805k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
805k
            (ctxt->node->content != NULL)) return(0);
2936
1.28M
    } else if (xmlNodeIsText(lastChild))
2937
79.1k
        return(0);
2938
1.20M
    else if ((ctxt->node->children != NULL) &&
2939
1.20M
             (xmlNodeIsText(ctxt->node->children)))
2940
39.8k
        return(0);
2941
1.96M
    return(1);
2942
2.08M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
8.03M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
8.03M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
8.03M
    xmlChar *buffer = NULL;
2973
8.03M
    int len = 0;
2974
8.03M
    int max = XML_MAX_NAMELEN;
2975
8.03M
    xmlChar *ret = NULL;
2976
8.03M
    const xmlChar *cur = name;
2977
8.03M
    int c;
2978
2979
8.03M
    if (prefix == NULL) return(NULL);
2980
8.03M
    *prefix = NULL;
2981
2982
8.03M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
8.03M
    if (cur[0] == ':')
2993
23.1k
  return(xmlStrdup(name));
2994
2995
8.00M
    c = *cur++;
2996
44.8M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
36.8M
  buf[len++] = c;
2998
36.8M
  c = *cur++;
2999
36.8M
    }
3000
8.00M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
28.0k
  max = len * 2;
3006
3007
28.0k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
28.0k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
28.0k
  memcpy(buffer, buf, len);
3013
3.32M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
3.29M
      if (len + 10 > max) {
3015
7.69k
          xmlChar *tmp;
3016
3017
7.69k
    max *= 2;
3018
7.69k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
7.69k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
7.69k
    buffer = tmp;
3025
7.69k
      }
3026
3.29M
      buffer[len++] = c;
3027
3.29M
      c = *cur++;
3028
3.29M
  }
3029
28.0k
  buffer[len] = 0;
3030
28.0k
    }
3031
3032
8.00M
    if ((c == ':') && (*cur == 0)) {
3033
51.8k
        if (buffer != NULL)
3034
1.22k
      xmlFree(buffer);
3035
51.8k
  *prefix = NULL;
3036
51.8k
  return(xmlStrdup(name));
3037
51.8k
    }
3038
3039
7.95M
    if (buffer == NULL)
3040
7.92M
  ret = xmlStrndup(buf, len);
3041
26.7k
    else {
3042
26.7k
  ret = buffer;
3043
26.7k
  buffer = NULL;
3044
26.7k
  max = XML_MAX_NAMELEN;
3045
26.7k
    }
3046
3047
3048
7.95M
    if (c == ':') {
3049
1.65M
  c = *cur;
3050
1.65M
        *prefix = ret;
3051
1.65M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
1.65M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
1.65M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
1.65M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
1.65M
        (c == '_') || (c == ':'))) {
3063
69.8k
      int l;
3064
69.8k
      int first = CUR_SCHAR(cur, l);
3065
3066
69.8k
      if (!IS_LETTER(first) && (first != '_')) {
3067
20.5k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
20.5k
          "Name %s is not XML Namespace compliant\n",
3069
20.5k
          name);
3070
20.5k
      }
3071
69.8k
  }
3072
1.65M
  cur++;
3073
3074
12.8M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
11.2M
      buf[len++] = c;
3076
11.2M
      c = *cur++;
3077
11.2M
  }
3078
1.65M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
16.5k
      max = len * 2;
3084
3085
16.5k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
16.5k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
16.5k
      memcpy(buffer, buf, len);
3091
1.92M
      while (c != 0) { /* tested bigname2.xml */
3092
1.90M
    if (len + 10 > max) {
3093
4.87k
        xmlChar *tmp;
3094
3095
4.87k
        max *= 2;
3096
4.87k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
4.87k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
4.87k
        buffer = tmp;
3103
4.87k
    }
3104
1.90M
    buffer[len++] = c;
3105
1.90M
    c = *cur++;
3106
1.90M
      }
3107
16.5k
      buffer[len] = 0;
3108
16.5k
  }
3109
3110
1.65M
  if (buffer == NULL)
3111
1.63M
      ret = xmlStrndup(buf, len);
3112
16.5k
  else {
3113
16.5k
      ret = buffer;
3114
16.5k
  }
3115
1.65M
    }
3116
3117
7.95M
    return(ret);
3118
7.95M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
28.2M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
28.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
24.9M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
24.9M
      (((c >= 'a') && (c <= 'z')) ||
3160
24.9M
       ((c >= 'A') && (c <= 'Z')) ||
3161
24.9M
       (c == '_') || (c == ':') ||
3162
24.9M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
24.9M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
24.9M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
24.9M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
24.9M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
24.9M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
24.9M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
24.9M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
24.9M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
24.9M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
24.9M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
24.9M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
23.7M
      return(1);
3175
24.9M
    } else {
3176
3.28M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
2.70M
      return(1);
3178
3.28M
    }
3179
1.80M
    return(0);
3180
28.2M
}
3181
3182
static int
3183
570M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
570M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
548M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
548M
      (((c >= 'a') && (c <= 'z')) ||
3191
548M
       ((c >= 'A') && (c <= 'Z')) ||
3192
548M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
548M
       (c == '_') || (c == ':') ||
3194
548M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
548M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
548M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
548M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
548M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
548M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
548M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
548M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
548M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
548M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
548M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
548M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
548M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
548M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
548M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
525M
       return(1);
3210
548M
    } else {
3211
21.1M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
21.1M
            (c == '.') || (c == '-') ||
3213
21.1M
      (c == '_') || (c == ':') ||
3214
21.1M
      (IS_COMBINING(c)) ||
3215
21.1M
      (IS_EXTENDER(c)))
3216
18.4M
      return(1);
3217
21.1M
    }
3218
26.5M
    return(0);
3219
570M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
3.94M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
3.94M
    int len = 0, l;
3227
3.94M
    int c;
3228
3.94M
    int count = 0;
3229
3.94M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
1.68M
                    XML_MAX_TEXT_LENGTH :
3231
3.94M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
3.94M
    GROW;
3241
3.94M
    if (ctxt->instate == XML_PARSER_EOF)
3242
18
        return(NULL);
3243
3.94M
    c = CUR_CHAR(l);
3244
3.94M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
2.15M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
2.15M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
2.01M
         ((c >= 'A') && (c <= 'Z')) ||
3252
2.01M
         (c == '_') || (c == ':') ||
3253
2.01M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
2.01M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
2.01M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
2.01M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
2.01M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
2.01M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
2.01M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
2.01M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
2.01M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
2.01M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
2.01M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
2.01M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.42M
      return(NULL);
3266
1.42M
  }
3267
729k
  len += l;
3268
729k
  NEXTL(l);
3269
729k
  c = CUR_CHAR(l);
3270
18.8M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
18.8M
         (((c >= 'a') && (c <= 'z')) ||
3272
18.5M
          ((c >= 'A') && (c <= 'Z')) ||
3273
18.5M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
18.5M
          (c == '_') || (c == ':') ||
3275
18.5M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
18.5M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
18.5M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
18.5M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
18.5M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
18.5M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
18.5M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
18.5M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
18.5M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
18.5M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
18.5M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
18.5M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
18.5M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
18.5M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
18.5M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
18.5M
    )) {
3291
18.0M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
75.7k
    count = 0;
3293
75.7k
    GROW;
3294
75.7k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
75.7k
      }
3297
18.0M
            if (len <= INT_MAX - l)
3298
18.0M
          len += l;
3299
18.0M
      NEXTL(l);
3300
18.0M
      c = CUR_CHAR(l);
3301
18.0M
  }
3302
1.79M
    } else {
3303
1.79M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
1.79M
      (!IS_LETTER(c) && (c != '_') &&
3305
1.67M
       (c != ':'))) {
3306
1.25M
      return(NULL);
3307
1.25M
  }
3308
536k
  len += l;
3309
536k
  NEXTL(l);
3310
536k
  c = CUR_CHAR(l);
3311
3312
12.9M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
12.9M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
12.7M
    (c == '.') || (c == '-') ||
3315
12.7M
    (c == '_') || (c == ':') ||
3316
12.7M
    (IS_COMBINING(c)) ||
3317
12.7M
    (IS_EXTENDER(c)))) {
3318
12.3M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
49.3k
    count = 0;
3320
49.3k
    GROW;
3321
49.3k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
49.3k
      }
3324
12.3M
            if (len <= INT_MAX - l)
3325
12.3M
          len += l;
3326
12.3M
      NEXTL(l);
3327
12.3M
      c = CUR_CHAR(l);
3328
12.3M
  }
3329
536k
    }
3330
1.26M
    if (len > maxLength) {
3331
28
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
28
        return(NULL);
3333
28
    }
3334
1.26M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
1.26M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
4.47k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
1.26M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
1.26M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
35.8M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
35.8M
    const xmlChar *in;
3370
35.8M
    const xmlChar *ret;
3371
35.8M
    size_t count = 0;
3372
35.8M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
9.45M
                       XML_MAX_TEXT_LENGTH :
3374
35.8M
                       XML_MAX_NAME_LENGTH;
3375
3376
35.8M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
35.8M
    in = ctxt->input->cur;
3386
35.8M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
35.8M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
35.8M
  (*in == '_') || (*in == ':')) {
3389
32.6M
  in++;
3390
151M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
151M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
151M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
151M
         (*in == '_') || (*in == '-') ||
3394
151M
         (*in == ':') || (*in == '.'))
3395
118M
      in++;
3396
32.6M
  if ((*in > 0) && (*in < 0x80)) {
3397
31.8M
      count = in - ctxt->input->cur;
3398
31.8M
            if (count > maxLength) {
3399
13
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
13
                return(NULL);
3401
13
            }
3402
31.8M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
31.8M
      ctxt->input->cur = in;
3404
31.8M
      ctxt->input->col += count;
3405
31.8M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
31.8M
      return(ret);
3408
31.8M
  }
3409
32.6M
    }
3410
    /* accelerator for special cases */
3411
3.94M
    return(xmlParseNameComplex(ctxt));
3412
35.8M
}
3413
3414
static const xmlChar *
3415
3.06M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
3.06M
    int len = 0, l;
3417
3.06M
    int c;
3418
3.06M
    int count = 0;
3419
3.06M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
928k
                    XML_MAX_TEXT_LENGTH :
3421
3.06M
                    XML_MAX_NAME_LENGTH;
3422
3.06M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
3.06M
    GROW;
3432
3.06M
    startPosition = CUR_PTR - BASE_PTR;
3433
3.06M
    c = CUR_CHAR(l);
3434
3.06M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
3.06M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
2.07M
  return(NULL);
3437
2.07M
    }
3438
3439
18.5M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
18.5M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
17.5M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
54.6k
      count = 0;
3443
54.6k
      GROW;
3444
54.6k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
54.6k
  }
3447
17.5M
        if (len <= INT_MAX - l)
3448
17.5M
      len += l;
3449
17.5M
  NEXTL(l);
3450
17.5M
  c = CUR_CHAR(l);
3451
17.5M
  if (c == 0) {
3452
68.8k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
68.8k
      ctxt->input->cur -= l;
3459
68.8k
      GROW;
3460
68.8k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
68.8k
      ctxt->input->cur += l;
3463
68.8k
      c = CUR_CHAR(l);
3464
68.8k
  }
3465
17.5M
    }
3466
988k
    if (len > maxLength) {
3467
15
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
15
        return(NULL);
3469
15
    }
3470
988k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
988k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
20.5M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
20.5M
    const xmlChar *in, *e;
3491
20.5M
    const xmlChar *ret;
3492
20.5M
    size_t count = 0;
3493
20.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
4.84M
                       XML_MAX_TEXT_LENGTH :
3495
20.5M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
20.5M
    in = ctxt->input->cur;
3505
20.5M
    e = ctxt->input->end;
3506
20.5M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
20.5M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
20.5M
   (*in == '_')) && (in < e)) {
3509
17.9M
  in++;
3510
73.4M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
73.4M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
73.4M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
73.4M
          (*in == '_') || (*in == '-') ||
3514
73.4M
          (*in == '.')) && (in < e))
3515
55.5M
      in++;
3516
17.9M
  if (in >= e)
3517
3.66k
      goto complex;
3518
17.9M
  if ((*in > 0) && (*in < 0x80)) {
3519
17.4M
      count = in - ctxt->input->cur;
3520
17.4M
            if (count > maxLength) {
3521
6
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
6
                return(NULL);
3523
6
            }
3524
17.4M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
17.4M
      ctxt->input->cur = in;
3526
17.4M
      ctxt->input->col += count;
3527
17.4M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
17.4M
      return(ret);
3531
17.4M
  }
3532
17.9M
    }
3533
3.06M
complex:
3534
3.06M
    return(xmlParseNCNameComplex(ctxt));
3535
20.5M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
3.83M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
3.83M
    register const xmlChar *cmp = other;
3551
3.83M
    register const xmlChar *in;
3552
3.83M
    const xmlChar *ret;
3553
3554
3.83M
    GROW;
3555
3.83M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
3.83M
    in = ctxt->input->cur;
3559
17.3M
    while (*in != 0 && *in == *cmp) {
3560
13.5M
  ++in;
3561
13.5M
  ++cmp;
3562
13.5M
    }
3563
3.83M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
3.19M
  ctxt->input->col += in - ctxt->input->cur;
3566
3.19M
  ctxt->input->cur = in;
3567
3.19M
  return (const xmlChar*) 1;
3568
3.19M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
637k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
637k
    if (ret == other) {
3573
20.6k
  return (const xmlChar*) 1;
3574
20.6k
    }
3575
616k
    return ret;
3576
637k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
25.4M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
25.4M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
25.4M
    const xmlChar *cur = *str;
3600
25.4M
    int len = 0, l;
3601
25.4M
    int c;
3602
25.4M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
3.72M
                    XML_MAX_TEXT_LENGTH :
3604
25.4M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
25.4M
    c = CUR_SCHAR(cur, l);
3611
25.4M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
13.9k
  return(NULL);
3613
13.9k
    }
3614
3615
25.4M
    COPY_BUF(l,buf,len,c);
3616
25.4M
    cur += l;
3617
25.4M
    c = CUR_SCHAR(cur, l);
3618
256M
    while (xmlIsNameChar(ctxt, c)) {
3619
232M
  COPY_BUF(l,buf,len,c);
3620
232M
  cur += l;
3621
232M
  c = CUR_SCHAR(cur, l);
3622
232M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
1.27M
      xmlChar *buffer;
3628
1.27M
      int max = len * 2;
3629
3630
1.27M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
1.27M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
1.27M
      memcpy(buffer, buf, len);
3636
288M
      while (xmlIsNameChar(ctxt, c)) {
3637
287M
    if (len + 10 > max) {
3638
1.27M
        xmlChar *tmp;
3639
3640
1.27M
        max *= 2;
3641
1.27M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
1.27M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
1.27M
        buffer = tmp;
3648
1.27M
    }
3649
287M
    COPY_BUF(l,buffer,len,c);
3650
287M
    cur += l;
3651
287M
    c = CUR_SCHAR(cur, l);
3652
287M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
287M
      }
3658
1.27M
      buffer[len] = 0;
3659
1.27M
      *str = cur;
3660
1.27M
      return(buffer);
3661
1.27M
  }
3662
232M
    }
3663
24.1M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
24.1M
    *str = cur;
3668
24.1M
    return(xmlStrndup(buf, len));
3669
24.1M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
506k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
506k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
506k
    int len = 0, l;
3690
506k
    int c;
3691
506k
    int count = 0;
3692
506k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
127k
                    XML_MAX_TEXT_LENGTH :
3694
506k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
506k
    GROW;
3701
506k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
506k
    c = CUR_CHAR(l);
3704
3705
3.65M
    while (xmlIsNameChar(ctxt, c)) {
3706
3.15M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
3.15M
  COPY_BUF(l,buf,len,c);
3711
3.15M
  NEXTL(l);
3712
3.15M
  c = CUR_CHAR(l);
3713
3.15M
  if (c == 0) {
3714
5.80k
      count = 0;
3715
5.80k
      GROW;
3716
5.80k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
5.80k
            c = CUR_CHAR(l);
3719
5.80k
  }
3720
3.15M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
7.57k
      xmlChar *buffer;
3726
7.57k
      int max = len * 2;
3727
3728
7.57k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
7.57k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
7.57k
      memcpy(buffer, buf, len);
3734
2.91M
      while (xmlIsNameChar(ctxt, c)) {
3735
2.90M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
32.3k
        count = 0;
3737
32.3k
        GROW;
3738
32.3k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
32.3k
    }
3743
2.90M
    if (len + 10 > max) {
3744
4.37k
        xmlChar *tmp;
3745
3746
4.37k
        max *= 2;
3747
4.37k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
4.37k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
4.37k
        buffer = tmp;
3754
4.37k
    }
3755
2.90M
    COPY_BUF(l,buffer,len,c);
3756
2.90M
    NEXTL(l);
3757
2.90M
    c = CUR_CHAR(l);
3758
2.90M
                if (len > maxLength) {
3759
12
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
12
                    xmlFree(buffer);
3761
12
                    return(NULL);
3762
12
                }
3763
2.90M
      }
3764
7.56k
      buffer[len] = 0;
3765
7.56k
      return(buffer);
3766
7.57k
  }
3767
3.15M
    }
3768
499k
    if (len == 0)
3769
105k
        return(NULL);
3770
394k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
394k
    return(xmlStrndup(buf, len));
3775
394k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
860k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
860k
    xmlChar *buf = NULL;
3795
860k
    int len = 0;
3796
860k
    int size = XML_PARSER_BUFFER_SIZE;
3797
860k
    int c, l;
3798
860k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
203k
                    XML_MAX_HUGE_LENGTH :
3800
860k
                    XML_MAX_TEXT_LENGTH;
3801
860k
    xmlChar stop;
3802
860k
    xmlChar *ret = NULL;
3803
860k
    const xmlChar *cur = NULL;
3804
860k
    xmlParserInputPtr input;
3805
3806
860k
    if (RAW == '"') stop = '"';
3807
159k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
860k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
860k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
860k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
860k
    input = ctxt->input;
3824
860k
    GROW;
3825
860k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
860k
    NEXT;
3828
860k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
44.4M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
44.4M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
43.5M
  if (len + 5 >= size) {
3841
126k
      xmlChar *tmp;
3842
3843
126k
      size *= 2;
3844
126k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
126k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
126k
      buf = tmp;
3850
126k
  }
3851
43.5M
  COPY_BUF(l,buf,len,c);
3852
43.5M
  NEXTL(l);
3853
3854
43.5M
  GROW;
3855
43.5M
  c = CUR_CHAR(l);
3856
43.5M
  if (c == 0) {
3857
3.04k
      GROW;
3858
3.04k
      c = CUR_CHAR(l);
3859
3.04k
  }
3860
3861
43.5M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
43.5M
    }
3867
860k
    buf[len] = 0;
3868
860k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
860k
    if (c != stop) {
3871
4.70k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
4.70k
        goto error;
3873
4.70k
    }
3874
855k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
855k
    cur = buf;
3882
32.6M
    while (*cur != 0) { /* non input consuming */
3883
31.8M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
665k
      xmlChar *name;
3885
665k
      xmlChar tmp = *cur;
3886
665k
            int nameOk = 0;
3887
3888
665k
      cur++;
3889
665k
      name = xmlParseStringName(ctxt, &cur);
3890
665k
            if (name != NULL) {
3891
657k
                nameOk = 1;
3892
657k
                xmlFree(name);
3893
657k
            }
3894
665k
            if ((nameOk == 0) || (*cur != ';')) {
3895
18.2k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
18.2k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
18.2k
                            tmp);
3898
18.2k
                goto error;
3899
18.2k
      }
3900
647k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
647k
    (ctxt->inputNr == 1)) {
3902
5.89k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
5.89k
                goto error;
3904
5.89k
      }
3905
641k
      if (*cur == 0)
3906
0
          break;
3907
641k
  }
3908
31.8M
  cur++;
3909
31.8M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
831k
    ++ctxt->depth;
3920
831k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
831k
                                     0, 0, 0, /* check */ 1);
3922
831k
    --ctxt->depth;
3923
3924
831k
    if (orig != NULL) {
3925
831k
        *orig = buf;
3926
831k
        buf = NULL;
3927
831k
    }
3928
3929
860k
error:
3930
860k
    if (buf != NULL)
3931
28.8k
        xmlFree(buf);
3932
860k
    return(ret);
3933
831k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.80M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.80M
    xmlChar limit = 0;
3950
1.80M
    xmlChar *buf = NULL;
3951
1.80M
    xmlChar *rep = NULL;
3952
1.80M
    size_t len = 0;
3953
1.80M
    size_t buf_size = 0;
3954
1.80M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
638k
                       XML_MAX_HUGE_LENGTH :
3956
1.80M
                       XML_MAX_TEXT_LENGTH;
3957
1.80M
    int c, l, in_space = 0;
3958
1.80M
    xmlChar *current = NULL;
3959
1.80M
    xmlEntityPtr ent;
3960
3961
1.80M
    if (NXT(0) == '"') {
3962
1.17M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.17M
  limit = '"';
3964
1.17M
        NEXT;
3965
1.17M
    } else if (NXT(0) == '\'') {
3966
626k
  limit = '\'';
3967
626k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
626k
        NEXT;
3969
626k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.80M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.80M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.80M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.80M
    c = CUR_CHAR(l);
3985
62.6M
    while (((NXT(0) != limit) && /* checked */
3986
62.6M
            (IS_CHAR(c)) && (c != '<')) &&
3987
62.6M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
60.8M
  if (c == '&') {
3989
2.46M
      in_space = 0;
3990
2.46M
      if (NXT(1) == '#') {
3991
585k
    int val = xmlParseCharRef(ctxt);
3992
3993
585k
    if (val == '&') {
3994
37.8k
        if (ctxt->replaceEntities) {
3995
12.5k
      if (len + 10 > buf_size) {
3996
280
          growBuffer(buf, 10);
3997
280
      }
3998
12.5k
      buf[len++] = '&';
3999
25.2k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
25.2k
      if (len + 10 > buf_size) {
4005
324
          growBuffer(buf, 10);
4006
324
      }
4007
25.2k
      buf[len++] = '&';
4008
25.2k
      buf[len++] = '#';
4009
25.2k
      buf[len++] = '3';
4010
25.2k
      buf[len++] = '8';
4011
25.2k
      buf[len++] = ';';
4012
25.2k
        }
4013
548k
    } else if (val != 0) {
4014
449k
        if (len + 10 > buf_size) {
4015
2.16k
      growBuffer(buf, 10);
4016
2.16k
        }
4017
449k
        len += xmlCopyChar(0, &buf[len], val);
4018
449k
    }
4019
1.88M
      } else {
4020
1.88M
    ent = xmlParseEntityRef(ctxt);
4021
1.88M
    if ((ent != NULL) &&
4022
1.88M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
246k
        if (len + 10 > buf_size) {
4024
2.14k
      growBuffer(buf, 10);
4025
2.14k
        }
4026
246k
        if ((ctxt->replaceEntities == 0) &&
4027
246k
            (ent->content[0] == '&')) {
4028
65.3k
      buf[len++] = '&';
4029
65.3k
      buf[len++] = '#';
4030
65.3k
      buf[len++] = '3';
4031
65.3k
      buf[len++] = '8';
4032
65.3k
      buf[len++] = ';';
4033
180k
        } else {
4034
180k
      buf[len++] = ent->content[0];
4035
180k
        }
4036
1.63M
    } else if ((ent != NULL) &&
4037
1.63M
               (ctxt->replaceEntities != 0)) {
4038
884k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
884k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
884k
      ++ctxt->depth;
4043
884k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
884k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
884k
                                /* check */ 1);
4046
884k
      --ctxt->depth;
4047
884k
      if (rep != NULL) {
4048
867k
          current = rep;
4049
164M
          while (*current != 0) { /* non input consuming */
4050
164M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
164M
                                    (*current == 0x9)) {
4052
123k
                                    buf[len++] = 0x20;
4053
123k
                                    current++;
4054
123k
                                } else
4055
163M
                                    buf[len++] = *current++;
4056
164M
        if (len + 10 > buf_size) {
4057
29.4k
            growBuffer(buf, 10);
4058
29.4k
        }
4059
164M
          }
4060
867k
          xmlFree(rep);
4061
867k
          rep = NULL;
4062
867k
      }
4063
884k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
884k
    } else if (ent != NULL) {
4071
267k
        int i = xmlStrlen(ent->name);
4072
267k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
267k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
267k
      (ent->content != NULL)) {
4081
204k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
10.4k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
10.4k
                            ctxt->sizeentcopy = ent->length;
4085
4086
10.4k
                            ++ctxt->depth;
4087
10.4k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
10.4k
                                    ent->content, ent->length,
4089
10.4k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
10.4k
                                    /* check */ 1);
4091
10.4k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
10.4k
                            if (ctxt->inSubset == 0) {
4100
9.41k
                                ent->flags |= XML_ENT_CHECKED;
4101
9.41k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
9.41k
                            }
4103
4104
10.4k
                            if (rep != NULL) {
4105
10.1k
                                xmlFree(rep);
4106
10.1k
                                rep = NULL;
4107
10.1k
                            } else {
4108
291
                                ent->content[0] = 0;
4109
291
                            }
4110
4111
10.4k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
12
                                goto error;
4113
194k
                        } else {
4114
194k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
194k
                        }
4117
204k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
267k
        buf[len++] = '&';
4123
270k
        while (len + i + 10 > buf_size) {
4124
5.79k
      growBuffer(buf, i + 10);
4125
5.79k
        }
4126
816k
        for (;i > 0;i--)
4127
549k
      buf[len++] = *cur++;
4128
267k
        buf[len++] = ';';
4129
267k
    }
4130
1.88M
      }
4131
58.4M
  } else {
4132
58.4M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
4.75M
          if ((len != 0) || (!normalize)) {
4134
4.66M
        if ((!normalize) || (!in_space)) {
4135
4.48M
      COPY_BUF(l,buf,len,0x20);
4136
4.49M
      while (len + 10 > buf_size) {
4137
23.1k
          growBuffer(buf, 10);
4138
23.1k
      }
4139
4.48M
        }
4140
4.66M
        in_space = 1;
4141
4.66M
    }
4142
53.6M
      } else {
4143
53.6M
          in_space = 0;
4144
53.6M
    COPY_BUF(l,buf,len,c);
4145
53.6M
    if (len + 10 > buf_size) {
4146
318k
        growBuffer(buf, 10);
4147
318k
    }
4148
53.6M
      }
4149
58.4M
      NEXTL(l);
4150
58.4M
  }
4151
60.8M
  GROW;
4152
60.8M
  c = CUR_CHAR(l);
4153
60.8M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
60.8M
    }
4159
1.80M
    if (ctxt->instate == XML_PARSER_EOF)
4160
692
        goto error;
4161
4162
1.79M
    if ((in_space) && (normalize)) {
4163
111k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
54.4k
    }
4165
1.79M
    buf[len] = 0;
4166
1.79M
    if (RAW == '<') {
4167
620k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.17M
    } else if (RAW != limit) {
4169
246k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
147k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
147k
         "invalid character in attribute value\n");
4172
147k
  } else {
4173
99.1k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
99.1k
         "AttValue: ' expected\n");
4175
99.1k
        }
4176
246k
    } else
4177
932k
  NEXT;
4178
4179
1.79M
    if (attlen != NULL) *attlen = len;
4180
1.79M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
704
error:
4185
704
    if (buf != NULL)
4186
704
        xmlFree(buf);
4187
704
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
704
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
3.00M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
3.00M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
3.00M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
3.00M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
314k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
314k
    xmlChar *buf = NULL;
4250
314k
    int len = 0;
4251
314k
    int size = XML_PARSER_BUFFER_SIZE;
4252
314k
    int cur, l;
4253
314k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
95.0k
                    XML_MAX_TEXT_LENGTH :
4255
314k
                    XML_MAX_NAME_LENGTH;
4256
314k
    xmlChar stop;
4257
314k
    int state = ctxt->instate;
4258
314k
    int count = 0;
4259
4260
314k
    SHRINK;
4261
314k
    if (RAW == '"') {
4262
230k
        NEXT;
4263
230k
  stop = '"';
4264
230k
    } else if (RAW == '\'') {
4265
75.1k
        NEXT;
4266
75.1k
  stop = '\'';
4267
75.1k
    } else {
4268
8.91k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
8.91k
  return(NULL);
4270
8.91k
    }
4271
4272
305k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
305k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
305k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
305k
    cur = CUR_CHAR(l);
4279
13.2M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
12.9M
  if (len + 5 >= size) {
4281
18.0k
      xmlChar *tmp;
4282
4283
18.0k
      size *= 2;
4284
18.0k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
18.0k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
18.0k
      buf = tmp;
4292
18.0k
  }
4293
12.9M
  count++;
4294
12.9M
  if (count > 50) {
4295
174k
      SHRINK;
4296
174k
      GROW;
4297
174k
      count = 0;
4298
174k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
174k
  }
4303
12.9M
  COPY_BUF(l,buf,len,cur);
4304
12.9M
  NEXTL(l);
4305
12.9M
  cur = CUR_CHAR(l);
4306
12.9M
  if (cur == 0) {
4307
4.05k
      GROW;
4308
4.05k
      SHRINK;
4309
4.05k
      cur = CUR_CHAR(l);
4310
4.05k
  }
4311
12.9M
        if (len > maxLength) {
4312
46
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
46
            xmlFree(buf);
4314
46
            ctxt->instate = (xmlParserInputState) state;
4315
46
            return(NULL);
4316
46
        }
4317
12.9M
    }
4318
305k
    buf[len] = 0;
4319
305k
    ctxt->instate = (xmlParserInputState) state;
4320
305k
    if (!IS_CHAR(cur)) {
4321
7.18k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
298k
    } else {
4323
298k
  NEXT;
4324
298k
    }
4325
305k
    return(buf);
4326
305k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
96.1k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
96.1k
    xmlChar *buf = NULL;
4344
96.1k
    int len = 0;
4345
96.1k
    int size = XML_PARSER_BUFFER_SIZE;
4346
96.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
29.6k
                    XML_MAX_TEXT_LENGTH :
4348
96.1k
                    XML_MAX_NAME_LENGTH;
4349
96.1k
    xmlChar cur;
4350
96.1k
    xmlChar stop;
4351
96.1k
    int count = 0;
4352
96.1k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
96.1k
    SHRINK;
4355
96.1k
    if (RAW == '"') {
4356
52.9k
        NEXT;
4357
52.9k
  stop = '"';
4358
52.9k
    } else if (RAW == '\'') {
4359
41.5k
        NEXT;
4360
41.5k
  stop = '\'';
4361
41.5k
    } else {
4362
1.63k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
1.63k
  return(NULL);
4364
1.63k
    }
4365
94.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
94.5k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
94.5k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
94.5k
    cur = CUR;
4372
3.05M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
2.96M
  if (len + 1 >= size) {
4374
4.77k
      xmlChar *tmp;
4375
4376
4.77k
      size *= 2;
4377
4.77k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
4.77k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
4.77k
      buf = tmp;
4384
4.77k
  }
4385
2.96M
  buf[len++] = cur;
4386
2.96M
  count++;
4387
2.96M
  if (count > 50) {
4388
27.2k
      SHRINK;
4389
27.2k
      GROW;
4390
27.2k
      count = 0;
4391
27.2k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
27.2k
  }
4396
2.96M
  NEXT;
4397
2.96M
  cur = CUR;
4398
2.96M
  if (cur == 0) {
4399
781
      GROW;
4400
781
      SHRINK;
4401
781
      cur = CUR;
4402
781
  }
4403
2.96M
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
2.96M
    }
4409
94.5k
    buf[len] = 0;
4410
94.5k
    if (cur != stop) {
4411
7.66k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
86.8k
    } else {
4413
86.8k
  NEXT;
4414
86.8k
    }
4415
94.5k
    ctxt->instate = oldstate;
4416
94.5k
    return(buf);
4417
94.5k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
32.8M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
32.8M
    const xmlChar *in;
4482
32.8M
    int nbchar = 0;
4483
32.8M
    int line = ctxt->input->line;
4484
32.8M
    int col = ctxt->input->col;
4485
32.8M
    int ccol;
4486
4487
32.8M
    SHRINK;
4488
32.8M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
32.8M
    in = ctxt->input->cur;
4494
35.9M
    do {
4495
41.9M
get_more_space:
4496
59.4M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
41.9M
        if (*in == 0xA) {
4498
6.23M
            do {
4499
6.23M
                ctxt->input->line++; ctxt->input->col = 1;
4500
6.23M
                in++;
4501
6.23M
            } while (*in == 0xA);
4502
5.97M
            goto get_more_space;
4503
5.97M
        }
4504
35.9M
        if (*in == '<') {
4505
6.06M
            nbchar = in - ctxt->input->cur;
4506
6.06M
            if (nbchar > 0) {
4507
6.06M
                const xmlChar *tmp = ctxt->input->cur;
4508
6.06M
                ctxt->input->cur = in;
4509
4510
6.06M
                if ((ctxt->sax != NULL) &&
4511
6.06M
                    (ctxt->sax->ignorableWhitespace !=
4512
6.06M
                     ctxt->sax->characters)) {
4513
2.77M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.69M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.69M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.69M
                                                   tmp, nbchar);
4517
1.69M
                    } else {
4518
1.08M
                        if (ctxt->sax->characters != NULL)
4519
1.08M
                            ctxt->sax->characters(ctxt->userData,
4520
1.08M
                                                  tmp, nbchar);
4521
1.08M
                        if (*ctxt->space == -1)
4522
300k
                            *ctxt->space = -2;
4523
1.08M
                    }
4524
3.28M
                } else if ((ctxt->sax != NULL) &&
4525
3.28M
                           (ctxt->sax->characters != NULL)) {
4526
3.28M
                    ctxt->sax->characters(ctxt->userData,
4527
3.28M
                                          tmp, nbchar);
4528
3.28M
                }
4529
6.06M
            }
4530
6.06M
            return;
4531
6.06M
        }
4532
4533
36.6M
get_more:
4534
36.6M
        ccol = ctxt->input->col;
4535
356M
        while (test_char_data[*in]) {
4536
319M
            in++;
4537
319M
            ccol++;
4538
319M
        }
4539
36.6M
        ctxt->input->col = ccol;
4540
36.6M
        if (*in == 0xA) {
4541
6.05M
            do {
4542
6.05M
                ctxt->input->line++; ctxt->input->col = 1;
4543
6.05M
                in++;
4544
6.05M
            } while (*in == 0xA);
4545
5.71M
            goto get_more;
4546
5.71M
        }
4547
30.9M
        if (*in == ']') {
4548
1.27M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
208k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
208k
                ctxt->input->cur = in + 1;
4551
208k
                return;
4552
208k
            }
4553
1.06M
            in++;
4554
1.06M
            ctxt->input->col++;
4555
1.06M
            goto get_more;
4556
1.27M
        }
4557
29.6M
        nbchar = in - ctxt->input->cur;
4558
29.6M
        if (nbchar > 0) {
4559
14.6M
            if ((ctxt->sax != NULL) &&
4560
14.6M
                (ctxt->sax->ignorableWhitespace !=
4561
14.6M
                 ctxt->sax->characters) &&
4562
14.6M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
1.59M
                const xmlChar *tmp = ctxt->input->cur;
4564
1.59M
                ctxt->input->cur = in;
4565
4566
1.59M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
439k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
439k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
439k
                                                       tmp, nbchar);
4570
1.15M
                } else {
4571
1.15M
                    if (ctxt->sax->characters != NULL)
4572
1.15M
                        ctxt->sax->characters(ctxt->userData,
4573
1.15M
                                              tmp, nbchar);
4574
1.15M
                    if (*ctxt->space == -1)
4575
572k
                        *ctxt->space = -2;
4576
1.15M
                }
4577
1.59M
                line = ctxt->input->line;
4578
1.59M
                col = ctxt->input->col;
4579
13.0M
            } else if (ctxt->sax != NULL) {
4580
13.0M
                if (ctxt->sax->characters != NULL)
4581
13.0M
                    ctxt->sax->characters(ctxt->userData,
4582
13.0M
                                          ctxt->input->cur, nbchar);
4583
13.0M
                line = ctxt->input->line;
4584
13.0M
                col = ctxt->input->col;
4585
13.0M
            }
4586
14.6M
        }
4587
29.6M
        ctxt->input->cur = in;
4588
29.6M
        if (*in == 0xD) {
4589
3.34M
            in++;
4590
3.34M
            if (*in == 0xA) {
4591
3.19M
                ctxt->input->cur = in;
4592
3.19M
                in++;
4593
3.19M
                ctxt->input->line++; ctxt->input->col = 1;
4594
3.19M
                continue; /* while */
4595
3.19M
            }
4596
148k
            in--;
4597
148k
        }
4598
26.4M
        if (*in == '<') {
4599
9.10M
            return;
4600
9.10M
        }
4601
17.3M
        if (*in == '&') {
4602
1.41M
            return;
4603
1.41M
        }
4604
15.9M
        SHRINK;
4605
15.9M
        GROW;
4606
15.9M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
15.9M
        in = ctxt->input->cur;
4609
19.1M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
19.1M
             (*in == 0x09) || (*in == 0x0a));
4611
16.0M
    ctxt->input->line = line;
4612
16.0M
    ctxt->input->col = col;
4613
16.0M
    xmlParseCharDataComplex(ctxt);
4614
16.0M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
16.0M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
16.0M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
16.0M
    int nbchar = 0;
4631
16.0M
    int cur, l;
4632
16.0M
    int count = 0;
4633
4634
16.0M
    SHRINK;
4635
16.0M
    GROW;
4636
16.0M
    cur = CUR_CHAR(l);
4637
139M
    while ((cur != '<') && /* checked */
4638
139M
           (cur != '&') &&
4639
139M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
123M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
77.1k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
77.1k
  }
4643
123M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
123M
  NEXTL(l);
4646
123M
  cur = CUR_CHAR(l);
4647
123M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
81.4k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
81.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
73.7k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
73.7k
    } else {
4659
73.7k
        if (ctxt->sax->characters != NULL)
4660
73.7k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
73.7k
        if ((ctxt->sax->characters !=
4662
73.7k
             ctxt->sax->ignorableWhitespace) &&
4663
73.7k
      (*ctxt->space == -1))
4664
4.42k
      *ctxt->space = -2;
4665
73.7k
    }
4666
73.7k
      }
4667
81.4k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
81.4k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
81.4k
  }
4672
123M
  count++;
4673
123M
  if (count > 50) {
4674
1.26M
      SHRINK;
4675
1.26M
      GROW;
4676
1.26M
      count = 0;
4677
1.26M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.26M
  }
4680
123M
    }
4681
16.0M
    if (nbchar != 0) {
4682
3.83M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
3.83M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
3.48M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
11.7k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
11.7k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
3.47M
      } else {
4691
3.47M
    if (ctxt->sax->characters != NULL)
4692
3.47M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
3.47M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
3.47M
        (*ctxt->space == -1))
4695
456k
        *ctxt->space = -2;
4696
3.47M
      }
4697
3.48M
  }
4698
3.83M
    }
4699
16.0M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
12.9M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
12.9M
                          "PCDATA invalid Char value %d\n",
4703
12.9M
                    cur ? cur : CUR);
4704
12.9M
  NEXT;
4705
12.9M
    }
4706
16.0M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
600k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
600k
    xmlChar *URI = NULL;
4735
4736
600k
    SHRINK;
4737
4738
600k
    *publicID = NULL;
4739
600k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
228k
        SKIP(6);
4741
228k
  if (SKIP_BLANKS == 0) {
4742
875
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
875
                     "Space required after 'SYSTEM'\n");
4744
875
  }
4745
228k
  URI = xmlParseSystemLiteral(ctxt);
4746
228k
  if (URI == NULL) {
4747
1.39k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
1.39k
        }
4749
371k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
96.1k
        SKIP(6);
4751
96.1k
  if (SKIP_BLANKS == 0) {
4752
2.45k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
2.45k
        "Space required after 'PUBLIC'\n");
4754
2.45k
  }
4755
96.1k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
96.1k
  if (*publicID == NULL) {
4757
1.63k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
1.63k
  }
4759
96.1k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
85.6k
      if (SKIP_BLANKS == 0) {
4764
8.11k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
8.11k
      "Space required after the Public Identifier\n");
4766
8.11k
      }
4767
85.6k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
10.5k
      if (SKIP_BLANKS == 0) return(NULL);
4775
843
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
843
  }
4777
86.0k
  URI = xmlParseSystemLiteral(ctxt);
4778
86.0k
  if (URI == NULL) {
4779
7.56k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
7.56k
        }
4781
86.0k
    }
4782
589k
    return(URI);
4783
600k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
229k
                       size_t len, size_t size) {
4802
229k
    int q, ql;
4803
229k
    int r, rl;
4804
229k
    int cur, l;
4805
229k
    size_t count = 0;
4806
229k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
69.9k
                       XML_MAX_HUGE_LENGTH :
4808
229k
                       XML_MAX_TEXT_LENGTH;
4809
229k
    int inputid;
4810
4811
229k
    inputid = ctxt->input->id;
4812
4813
229k
    if (buf == NULL) {
4814
20.2k
        len = 0;
4815
20.2k
  size = XML_PARSER_BUFFER_SIZE;
4816
20.2k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
20.2k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
20.2k
    }
4822
229k
    GROW; /* Assure there's enough input data */
4823
229k
    q = CUR_CHAR(ql);
4824
229k
    if (q == 0)
4825
21.8k
        goto not_terminated;
4826
207k
    if (!IS_CHAR(q)) {
4827
34.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
34.0k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
34.0k
                    q);
4830
34.0k
  xmlFree (buf);
4831
34.0k
  return;
4832
34.0k
    }
4833
173k
    NEXTL(ql);
4834
173k
    r = CUR_CHAR(rl);
4835
173k
    if (r == 0)
4836
5.12k
        goto not_terminated;
4837
168k
    if (!IS_CHAR(r)) {
4838
6.28k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
6.28k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
6.28k
                    r);
4841
6.28k
  xmlFree (buf);
4842
6.28k
  return;
4843
6.28k
    }
4844
161k
    NEXTL(rl);
4845
161k
    cur = CUR_CHAR(l);
4846
161k
    if (cur == 0)
4847
2.19k
        goto not_terminated;
4848
21.9M
    while (IS_CHAR(cur) && /* checked */
4849
21.9M
           ((cur != '>') ||
4850
21.9M
      (r != '-') || (q != '-'))) {
4851
21.7M
  if ((r == '-') && (q == '-')) {
4852
98.6k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
98.6k
  }
4854
21.7M
  if (len + 5 >= size) {
4855
64.9k
      xmlChar *new_buf;
4856
64.9k
            size_t new_size;
4857
4858
64.9k
      new_size = size * 2;
4859
64.9k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
64.9k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
64.9k
      buf = new_buf;
4866
64.9k
            size = new_size;
4867
64.9k
  }
4868
21.7M
  COPY_BUF(ql,buf,len,q);
4869
21.7M
  q = r;
4870
21.7M
  ql = rl;
4871
21.7M
  r = cur;
4872
21.7M
  rl = l;
4873
4874
21.7M
  count++;
4875
21.7M
  if (count > 50) {
4876
366k
      SHRINK;
4877
366k
      GROW;
4878
366k
      count = 0;
4879
366k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
366k
  }
4884
21.7M
  NEXTL(l);
4885
21.7M
  cur = CUR_CHAR(l);
4886
21.7M
  if (cur == 0) {
4887
15.2k
      SHRINK;
4888
15.2k
      GROW;
4889
15.2k
      cur = CUR_CHAR(l);
4890
15.2k
  }
4891
4892
21.7M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
21.7M
    }
4899
159k
    buf[len] = 0;
4900
159k
    if (cur == 0) {
4901
15.2k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
15.2k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
144k
    } else if (!IS_CHAR(cur)) {
4904
36.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
36.2k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
36.2k
                    cur);
4907
108k
    } else {
4908
108k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
108k
        NEXT;
4914
108k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
108k
      (!ctxt->disableSAX))
4916
91.6k
      ctxt->sax->comment(ctxt->userData, buf);
4917
108k
    }
4918
159k
    xmlFree(buf);
4919
159k
    return;
4920
29.1k
not_terminated:
4921
29.1k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
29.1k
       "Comment not terminated\n", NULL);
4923
29.1k
    xmlFree(buf);
4924
29.1k
    return;
4925
159k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
11.9M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
11.9M
    xmlChar *buf = NULL;
4943
11.9M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
11.9M
    size_t len = 0;
4945
11.9M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
1.01M
                       XML_MAX_HUGE_LENGTH :
4947
11.9M
                       XML_MAX_TEXT_LENGTH;
4948
11.9M
    xmlParserInputState state;
4949
11.9M
    const xmlChar *in;
4950
11.9M
    size_t nbchar = 0;
4951
11.9M
    int ccol;
4952
11.9M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
11.9M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
11.9M
    SKIP(2);
4960
11.9M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
237
        return;
4962
11.9M
    state = ctxt->instate;
4963
11.9M
    ctxt->instate = XML_PARSER_COMMENT;
4964
11.9M
    inputid = ctxt->input->id;
4965
11.9M
    SKIP(2);
4966
11.9M
    SHRINK;
4967
11.9M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
11.9M
    in = ctxt->input->cur;
4974
11.9M
    do {
4975
11.9M
  if (*in == 0xA) {
4976
145k
      do {
4977
145k
    ctxt->input->line++; ctxt->input->col = 1;
4978
145k
    in++;
4979
145k
      } while (*in == 0xA);
4980
128k
  }
4981
15.1M
get_more:
4982
15.1M
        ccol = ctxt->input->col;
4983
137M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
137M
         ((*in >= 0x20) && (*in < '-')) ||
4985
137M
         (*in == 0x09)) {
4986
122M
        in++;
4987
122M
        ccol++;
4988
122M
  }
4989
15.1M
  ctxt->input->col = ccol;
4990
15.1M
  if (*in == 0xA) {
4991
1.49M
      do {
4992
1.49M
    ctxt->input->line++; ctxt->input->col = 1;
4993
1.49M
    in++;
4994
1.49M
      } while (*in == 0xA);
4995
1.36M
      goto get_more;
4996
1.36M
  }
4997
13.7M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
13.7M
  if (nbchar > 0) {
5002
3.46M
      if ((ctxt->sax != NULL) &&
5003
3.46M
    (ctxt->sax->comment != NULL)) {
5004
3.46M
    if (buf == NULL) {
5005
1.67M
        if ((*in == '-') && (in[1] == '-'))
5006
1.21M
            size = nbchar + 1;
5007
462k
        else
5008
462k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
1.67M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
1.67M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
1.67M
        len = 0;
5016
1.78M
    } else if (len + nbchar + 1 >= size) {
5017
245k
        xmlChar *new_buf;
5018
245k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
245k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
245k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
245k
        buf = new_buf;
5027
245k
    }
5028
3.46M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
3.46M
    len += nbchar;
5030
3.46M
    buf[len] = 0;
5031
3.46M
      }
5032
3.46M
  }
5033
13.7M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
13.7M
  ctxt->input->cur = in;
5040
13.7M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
13.7M
  if (*in == 0xD) {
5045
605k
      in++;
5046
605k
      if (*in == 0xA) {
5047
594k
    ctxt->input->cur = in;
5048
594k
    in++;
5049
594k
    ctxt->input->line++; ctxt->input->col = 1;
5050
594k
    goto get_more;
5051
594k
      }
5052
10.7k
      in--;
5053
10.7k
  }
5054
13.1M
  SHRINK;
5055
13.1M
  GROW;
5056
13.1M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
13.1M
  in = ctxt->input->cur;
5061
13.1M
  if (*in == '-') {
5062
12.9M
      if (in[1] == '-') {
5063
11.8M
          if (in[2] == '>') {
5064
11.7M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
11.7M
        SKIP(3);
5070
11.7M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
11.7M
            (!ctxt->disableSAX)) {
5072
11.6M
      if (buf != NULL)
5073
1.36M
          ctxt->sax->comment(ctxt->userData, buf);
5074
10.2M
      else
5075
10.2M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
11.6M
        }
5077
11.7M
        if (buf != NULL)
5078
1.46M
            xmlFree(buf);
5079
11.7M
        if (ctxt->instate != XML_PARSER_EOF)
5080
11.7M
      ctxt->instate = state;
5081
11.7M
        return;
5082
11.7M
    }
5083
109k
    if (buf != NULL) {
5084
104k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
104k
                          "Double hyphen within comment: "
5086
104k
                                      "<!--%.50s\n",
5087
104k
              buf);
5088
104k
    } else
5089
4.37k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
4.37k
                          "Double hyphen within comment\n", NULL);
5091
109k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
109k
    in++;
5096
109k
    ctxt->input->col++;
5097
109k
      }
5098
1.20M
      in++;
5099
1.20M
      ctxt->input->col++;
5100
1.20M
      goto get_more;
5101
12.9M
  }
5102
13.1M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
229k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
229k
    ctxt->instate = state;
5105
229k
    return;
5106
11.9M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
642k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
642k
    const xmlChar *name;
5125
5126
642k
    name = xmlParseName(ctxt);
5127
642k
    if ((name != NULL) &&
5128
642k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
642k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
642k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
203k
  int i;
5132
203k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
203k
      (name[2] == 'l') && (name[3] == 0)) {
5134
139k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
139k
     "XML declaration allowed only at the start of the document\n");
5136
139k
      return(name);
5137
139k
  } else if (name[3] == 0) {
5138
28.6k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
28.6k
      return(name);
5140
28.6k
  }
5141
89.0k
  for (i = 0;;i++) {
5142
89.0k
      if (xmlW3CPIs[i] == NULL) break;
5143
63.2k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
9.51k
          return(name);
5145
63.2k
  }
5146
25.8k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
25.8k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
25.8k
          NULL, NULL);
5149
25.8k
    }
5150
464k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
22.1k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
22.1k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
22.1k
    }
5154
464k
    return(name);
5155
642k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
283
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
283
    xmlChar *URL = NULL;
5176
283
    const xmlChar *tmp, *base;
5177
283
    xmlChar marker;
5178
5179
283
    tmp = catalog;
5180
283
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
283
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
273
  goto error;
5183
10
    tmp += 7;
5184
10
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
10
    if (*tmp != '=') {
5186
10
  return;
5187
10
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
273
error:
5211
273
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
273
            "Catalog PI syntax error: %s\n",
5213
273
      catalog, NULL);
5214
273
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
273
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
642k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
642k
    xmlChar *buf = NULL;
5235
642k
    size_t len = 0;
5236
642k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
642k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
231k
                       XML_MAX_HUGE_LENGTH :
5239
642k
                       XML_MAX_TEXT_LENGTH;
5240
642k
    int cur, l;
5241
642k
    const xmlChar *target;
5242
642k
    xmlParserInputState state;
5243
642k
    int count = 0;
5244
5245
642k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
642k
  int inputid = ctxt->input->id;
5247
642k
  state = ctxt->instate;
5248
642k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
642k
  SKIP(2);
5253
642k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
642k
        target = xmlParsePITarget(ctxt);
5260
642k
  if (target != NULL) {
5261
530k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
89.6k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
89.6k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
89.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
89.6k
        (ctxt->sax->processingInstruction != NULL))
5274
81.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
81.2k
                                         target, NULL);
5276
89.6k
    if (ctxt->instate != XML_PARSER_EOF)
5277
89.6k
        ctxt->instate = state;
5278
89.6k
    return;
5279
89.6k
      }
5280
441k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
441k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
441k
      if (SKIP_BLANKS == 0) {
5287
152k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
152k
        "ParsePI: PI %s space expected\n", target);
5289
152k
      }
5290
441k
      cur = CUR_CHAR(l);
5291
32.5M
      while (IS_CHAR(cur) && /* checked */
5292
32.5M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
32.0M
    if (len + 5 >= size) {
5294
78.3k
        xmlChar *tmp;
5295
78.3k
                    size_t new_size = size * 2;
5296
78.3k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
78.3k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
78.3k
        buf = tmp;
5304
78.3k
                    size = new_size;
5305
78.3k
    }
5306
32.0M
    count++;
5307
32.0M
    if (count > 50) {
5308
482k
        SHRINK;
5309
482k
        GROW;
5310
482k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
482k
        count = 0;
5315
482k
    }
5316
32.0M
    COPY_BUF(l,buf,len,cur);
5317
32.0M
    NEXTL(l);
5318
32.0M
    cur = CUR_CHAR(l);
5319
32.0M
    if (cur == 0) {
5320
28.9k
        SHRINK;
5321
28.9k
        GROW;
5322
28.9k
        cur = CUR_CHAR(l);
5323
28.9k
    }
5324
32.0M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
32.0M
      }
5332
441k
      buf[len] = 0;
5333
441k
      if (cur != '?') {
5334
88.6k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
88.6k
          "ParsePI: PI %s never end ...\n", target);
5336
352k
      } else {
5337
352k
    if (inputid != ctxt->input->id) {
5338
19
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
19
                             "PI declaration doesn't start and stop in"
5340
19
                                   " the same entity\n");
5341
19
    }
5342
352k
    SKIP(2);
5343
5344
352k
#ifdef LIBXML_CATALOG_ENABLED
5345
352k
    if (((state == XML_PARSER_MISC) ||
5346
352k
               (state == XML_PARSER_START)) &&
5347
352k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
283
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
283
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
283
      (allow == XML_CATA_ALLOW_ALL))
5351
283
      xmlParseCatalogPI(ctxt, buf);
5352
283
    }
5353
352k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
352k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
352k
        (ctxt->sax->processingInstruction != NULL))
5361
309k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
309k
                                         target, buf);
5363
352k
      }
5364
441k
      xmlFree(buf);
5365
441k
  } else {
5366
111k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
111k
  }
5368
552k
  if (ctxt->instate != XML_PARSER_EOF)
5369
552k
      ctxt->instate = state;
5370
552k
    }
5371
642k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
24.4k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
24.4k
    const xmlChar *name;
5394
24.4k
    xmlChar *Pubid;
5395
24.4k
    xmlChar *Systemid;
5396
5397
24.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
24.4k
    SKIP(2);
5400
5401
24.4k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
23.4k
  int inputid = ctxt->input->id;
5403
23.4k
  SHRINK;
5404
23.4k
  SKIP(8);
5405
23.4k
  if (SKIP_BLANKS == 0) {
5406
638
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
638
         "Space required after '<!NOTATION'\n");
5408
638
      return;
5409
638
  }
5410
5411
22.7k
        name = xmlParseName(ctxt);
5412
22.7k
  if (name == NULL) {
5413
1.09k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
1.09k
      return;
5415
1.09k
  }
5416
21.6k
  if (xmlStrchr(name, ':') != NULL) {
5417
308
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
308
         "colons are forbidden from notation names '%s'\n",
5419
308
         name, NULL, NULL);
5420
308
  }
5421
21.6k
  if (SKIP_BLANKS == 0) {
5422
2.14k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
2.14k
         "Space required after the NOTATION name'\n");
5424
2.14k
      return;
5425
2.14k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
19.5k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
19.5k
  SKIP_BLANKS;
5432
5433
19.5k
  if (RAW == '>') {
5434
14.1k
      if (inputid != ctxt->input->id) {
5435
5
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
5
                         "Notation declaration doesn't start and stop"
5437
5
                               " in the same entity\n");
5438
5
      }
5439
14.1k
      NEXT;
5440
14.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
14.1k
    (ctxt->sax->notationDecl != NULL))
5442
10.7k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
14.1k
  } else {
5444
5.45k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
5.45k
  }
5446
19.5k
  if (Systemid != NULL) xmlFree(Systemid);
5447
19.5k
  if (Pubid != NULL) xmlFree(Pubid);
5448
19.5k
    }
5449
24.4k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
1.04M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
1.04M
    const xmlChar *name = NULL;
5478
1.04M
    xmlChar *value = NULL;
5479
1.04M
    xmlChar *URI = NULL, *literal = NULL;
5480
1.04M
    const xmlChar *ndata = NULL;
5481
1.04M
    int isParameter = 0;
5482
1.04M
    xmlChar *orig = NULL;
5483
5484
1.04M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
1.04M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
1.04M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
1.03M
  int inputid = ctxt->input->id;
5491
1.03M
  SHRINK;
5492
1.03M
  SKIP(6);
5493
1.03M
  if (SKIP_BLANKS == 0) {
5494
10.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
10.0k
         "Space required after '<!ENTITY'\n");
5496
10.0k
  }
5497
5498
1.03M
  if (RAW == '%') {
5499
416k
      NEXT;
5500
416k
      if (SKIP_BLANKS == 0) {
5501
2.51k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
2.51k
             "Space required after '%%'\n");
5503
2.51k
      }
5504
416k
      isParameter = 1;
5505
416k
  }
5506
5507
1.03M
        name = xmlParseName(ctxt);
5508
1.03M
  if (name == NULL) {
5509
8.05k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
8.05k
                     "xmlParseEntityDecl: no name\n");
5511
8.05k
            return;
5512
8.05k
  }
5513
1.03M
  if (xmlStrchr(name, ':') != NULL) {
5514
1.69k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
1.69k
         "colons are forbidden from entities names '%s'\n",
5516
1.69k
         name, NULL, NULL);
5517
1.69k
  }
5518
1.03M
  if (SKIP_BLANKS == 0) {
5519
14.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
14.4k
         "Space required after the entity name\n");
5521
14.4k
  }
5522
5523
1.03M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
1.03M
  if (isParameter) {
5528
415k
      if ((RAW == '"') || (RAW == '\'')) {
5529
386k
          value = xmlParseEntityValue(ctxt, &orig);
5530
386k
    if (value) {
5531
363k
        if ((ctxt->sax != NULL) &&
5532
363k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
336k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
336k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
336k
            NULL, NULL, value);
5536
363k
    }
5537
386k
      } else {
5538
29.0k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
29.0k
    if ((URI == NULL) && (literal == NULL)) {
5540
2.76k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
2.76k
    }
5542
29.0k
    if (URI) {
5543
25.5k
        xmlURIPtr uri;
5544
5545
25.5k
        uri = xmlParseURI((const char *) URI);
5546
25.5k
        if (uri == NULL) {
5547
1.81k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.81k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
23.7k
        } else {
5555
23.7k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
513
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
23.2k
      } else {
5562
23.2k
          if ((ctxt->sax != NULL) &&
5563
23.2k
        (!ctxt->disableSAX) &&
5564
23.2k
        (ctxt->sax->entityDecl != NULL))
5565
20.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
20.8k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
20.8k
              literal, URI, NULL);
5568
23.2k
      }
5569
23.7k
      xmlFreeURI(uri);
5570
23.7k
        }
5571
25.5k
    }
5572
29.0k
      }
5573
616k
  } else {
5574
616k
      if ((RAW == '"') || (RAW == '\'')) {
5575
474k
          value = xmlParseEntityValue(ctxt, &orig);
5576
474k
    if ((ctxt->sax != NULL) &&
5577
474k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
421k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
421k
        XML_INTERNAL_GENERAL_ENTITY,
5580
421k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
474k
    if ((ctxt->myDoc == NULL) ||
5585
474k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
8.19k
        if (ctxt->myDoc == NULL) {
5587
1.23k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
1.23k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
1.23k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
1.23k
        }
5594
8.19k
        if (ctxt->myDoc->intSubset == NULL)
5595
1.23k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
1.23k
              BAD_CAST "fake", NULL, NULL);
5597
5598
8.19k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
8.19k
                    NULL, NULL, value);
5600
8.19k
    }
5601
474k
      } else {
5602
141k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
141k
    if ((URI == NULL) && (literal == NULL)) {
5604
12.5k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
12.5k
    }
5606
141k
    if (URI) {
5607
127k
        xmlURIPtr uri;
5608
5609
127k
        uri = xmlParseURI((const char *)URI);
5610
127k
        if (uri == NULL) {
5611
9.54k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
9.54k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
118k
        } else {
5619
118k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
3.77k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
3.77k
      }
5626
118k
      xmlFreeURI(uri);
5627
118k
        }
5628
127k
    }
5629
141k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
11.2k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
11.2k
           "Space required before 'NDATA'\n");
5632
11.2k
    }
5633
141k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
19.4k
        SKIP(5);
5635
19.4k
        if (SKIP_BLANKS == 0) {
5636
625
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
625
               "Space required after 'NDATA'\n");
5638
625
        }
5639
19.4k
        ndata = xmlParseName(ctxt);
5640
19.4k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
19.4k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
17.1k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
17.1k
            literal, URI, ndata);
5644
122k
    } else {
5645
122k
        if ((ctxt->sax != NULL) &&
5646
122k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
110k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
110k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
110k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
122k
        if ((ctxt->replaceEntities != 0) &&
5655
122k
      ((ctxt->myDoc == NULL) ||
5656
78.6k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
2.33k
      if (ctxt->myDoc == NULL) {
5658
1.10k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.10k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.10k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.10k
      }
5665
5666
2.33k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.10k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.10k
            BAD_CAST "fake", NULL, NULL);
5669
2.33k
      xmlSAX2EntityDecl(ctxt, name,
5670
2.33k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
2.33k
                  literal, URI, NULL);
5672
2.33k
        }
5673
122k
    }
5674
141k
      }
5675
616k
  }
5676
1.03M
  if (ctxt->instate == XML_PARSER_EOF)
5677
627
      goto done;
5678
1.03M
  SKIP_BLANKS;
5679
1.03M
  if (RAW != '>') {
5680
27.8k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
27.8k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
27.8k
      xmlHaltParser(ctxt);
5683
1.00M
  } else {
5684
1.00M
      if (inputid != ctxt->input->id) {
5685
58
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
58
                         "Entity declaration doesn't start and stop in"
5687
58
                               " the same entity\n");
5688
58
      }
5689
1.00M
      NEXT;
5690
1.00M
  }
5691
1.03M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
831k
      xmlEntityPtr cur = NULL;
5696
5697
831k
      if (isParameter) {
5698
373k
          if ((ctxt->sax != NULL) &&
5699
373k
        (ctxt->sax->getParameterEntity != NULL))
5700
373k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
457k
      } else {
5702
457k
          if ((ctxt->sax != NULL) &&
5703
457k
        (ctxt->sax->getEntity != NULL))
5704
457k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
457k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
29.3k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
29.3k
    }
5708
457k
      }
5709
831k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
677k
    cur->orig = orig;
5711
677k
                orig = NULL;
5712
677k
      }
5713
831k
  }
5714
5715
1.03M
done:
5716
1.03M
  if (value != NULL) xmlFree(value);
5717
1.03M
  if (URI != NULL) xmlFree(URI);
5718
1.03M
  if (literal != NULL) xmlFree(literal);
5719
1.03M
        if (orig != NULL) xmlFree(orig);
5720
1.03M
    }
5721
1.04M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
1.44M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
1.44M
    int val;
5757
1.44M
    xmlChar *ret;
5758
5759
1.44M
    *value = NULL;
5760
1.44M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
113k
  SKIP(9);
5762
113k
  return(XML_ATTRIBUTE_REQUIRED);
5763
113k
    }
5764
1.33M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
1.08M
  SKIP(8);
5766
1.08M
  return(XML_ATTRIBUTE_IMPLIED);
5767
1.08M
    }
5768
249k
    val = XML_ATTRIBUTE_NONE;
5769
249k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
92.1k
  SKIP(6);
5771
92.1k
  val = XML_ATTRIBUTE_FIXED;
5772
92.1k
  if (SKIP_BLANKS == 0) {
5773
972
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
972
         "Space required after '#FIXED'\n");
5775
972
  }
5776
92.1k
    }
5777
249k
    ret = xmlParseAttValue(ctxt);
5778
249k
    ctxt->instate = XML_PARSER_DTD;
5779
249k
    if (ret == NULL) {
5780
6.32k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
6.32k
           "Attribute default value declaration error\n");
5782
6.32k
    } else
5783
243k
        *value = ret;
5784
249k
    return(val);
5785
1.33M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
11.7k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
11.7k
    const xmlChar *name;
5809
11.7k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
11.7k
    if (RAW != '(') {
5812
489
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
489
  return(NULL);
5814
489
    }
5815
11.2k
    SHRINK;
5816
13.3k
    do {
5817
13.3k
        NEXT;
5818
13.3k
  SKIP_BLANKS;
5819
13.3k
        name = xmlParseName(ctxt);
5820
13.3k
  if (name == NULL) {
5821
346
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
346
         "Name expected in NOTATION declaration\n");
5823
346
            xmlFreeEnumeration(ret);
5824
346
      return(NULL);
5825
346
  }
5826
13.0k
  tmp = ret;
5827
16.2k
  while (tmp != NULL) {
5828
3.89k
      if (xmlStrEqual(name, tmp->name)) {
5829
663
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
663
    "standalone: attribute notation value token %s duplicated\n",
5831
663
         name, NULL);
5832
663
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
663
    break;
5835
663
      }
5836
3.22k
      tmp = tmp->next;
5837
3.22k
  }
5838
13.0k
  if (tmp == NULL) {
5839
12.3k
      cur = xmlCreateEnumeration(name);
5840
12.3k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
12.3k
      if (last == NULL) ret = last = cur;
5845
1.34k
      else {
5846
1.34k
    last->next = cur;
5847
1.34k
    last = cur;
5848
1.34k
      }
5849
12.3k
  }
5850
13.0k
  SKIP_BLANKS;
5851
13.0k
    } while (RAW == '|');
5852
10.8k
    if (RAW != ')') {
5853
4.01k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
4.01k
        xmlFreeEnumeration(ret);
5855
4.01k
  return(NULL);
5856
4.01k
    }
5857
6.86k
    NEXT;
5858
6.86k
    return(ret);
5859
10.8k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
152k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
152k
    xmlChar *name;
5881
152k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
152k
    if (RAW != '(') {
5884
15.7k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
15.7k
  return(NULL);
5886
15.7k
    }
5887
137k
    SHRINK;
5888
358k
    do {
5889
358k
        NEXT;
5890
358k
  SKIP_BLANKS;
5891
358k
        name = xmlParseNmtoken(ctxt);
5892
358k
  if (name == NULL) {
5893
1.58k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
1.58k
      return(ret);
5895
1.58k
  }
5896
357k
  tmp = ret;
5897
874k
  while (tmp != NULL) {
5898
523k
      if (xmlStrEqual(name, tmp->name)) {
5899
6.10k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
6.10k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
6.10k
         name, NULL);
5902
6.10k
    if (!xmlDictOwns(ctxt->dict, name))
5903
6.10k
        xmlFree(name);
5904
6.10k
    break;
5905
6.10k
      }
5906
517k
      tmp = tmp->next;
5907
517k
  }
5908
357k
  if (tmp == NULL) {
5909
350k
      cur = xmlCreateEnumeration(name);
5910
350k
      if (!xmlDictOwns(ctxt->dict, name))
5911
350k
    xmlFree(name);
5912
350k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
350k
      if (last == NULL) ret = last = cur;
5917
215k
      else {
5918
215k
    last->next = cur;
5919
215k
    last = cur;
5920
215k
      }
5921
350k
  }
5922
357k
  SKIP_BLANKS;
5923
357k
    } while (RAW == '|');
5924
135k
    if (RAW != ')') {
5925
6.77k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
6.77k
  return(ret);
5927
6.77k
    }
5928
128k
    NEXT;
5929
128k
    return(ret);
5930
135k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
164k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
164k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
12.0k
  SKIP(8);
5953
12.0k
  if (SKIP_BLANKS == 0) {
5954
291
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
291
         "Space required after 'NOTATION'\n");
5956
291
      return(0);
5957
291
  }
5958
11.7k
  *tree = xmlParseNotationType(ctxt);
5959
11.7k
  if (*tree == NULL) return(0);
5960
6.86k
  return(XML_ATTRIBUTE_NOTATION);
5961
11.7k
    }
5962
152k
    *tree = xmlParseEnumerationType(ctxt);
5963
152k
    if (*tree == NULL) return(0);
5964
135k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
152k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
1.48M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
1.48M
    SHRINK;
6017
1.48M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
477k
  SKIP(5);
6019
477k
  return(XML_ATTRIBUTE_CDATA);
6020
1.00M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
12.9k
  SKIP(6);
6022
12.9k
  return(XML_ATTRIBUTE_IDREFS);
6023
992k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
47.1k
  SKIP(5);
6025
47.1k
  return(XML_ATTRIBUTE_IDREF);
6026
945k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
383k
        SKIP(2);
6028
383k
  return(XML_ATTRIBUTE_ID);
6029
562k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
9.99k
  SKIP(6);
6031
9.99k
  return(XML_ATTRIBUTE_ENTITY);
6032
552k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
15.0k
  SKIP(8);
6034
15.0k
  return(XML_ATTRIBUTE_ENTITIES);
6035
537k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
81.6k
  SKIP(8);
6037
81.6k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
455k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
291k
  SKIP(7);
6040
291k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
291k
     }
6042
164k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
1.48M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
725k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
725k
    const xmlChar *elemName;
6061
725k
    const xmlChar *attrName;
6062
725k
    xmlEnumerationPtr tree;
6063
6064
725k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
725k
    SKIP(2);
6067
6068
725k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
721k
  int inputid = ctxt->input->id;
6070
6071
721k
  SKIP(7);
6072
721k
  if (SKIP_BLANKS == 0) {
6073
12.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
12.9k
                     "Space required after '<!ATTLIST'\n");
6075
12.9k
  }
6076
721k
        elemName = xmlParseName(ctxt);
6077
721k
  if (elemName == NULL) {
6078
4.60k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
4.60k
         "ATTLIST: no name for Element\n");
6080
4.60k
      return;
6081
4.60k
  }
6082
717k
  SKIP_BLANKS;
6083
717k
  GROW;
6084
2.11M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
1.52M
      int type;
6086
1.52M
      int def;
6087
1.52M
      xmlChar *defaultValue = NULL;
6088
6089
1.52M
      GROW;
6090
1.52M
            tree = NULL;
6091
1.52M
      attrName = xmlParseName(ctxt);
6092
1.52M
      if (attrName == NULL) {
6093
19.6k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
19.6k
             "ATTLIST: no name for Attribute\n");
6095
19.6k
    break;
6096
19.6k
      }
6097
1.50M
      GROW;
6098
1.50M
      if (SKIP_BLANKS == 0) {
6099
18.3k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
18.3k
            "Space required after the attribute name\n");
6101
18.3k
    break;
6102
18.3k
      }
6103
6104
1.48M
      type = xmlParseAttributeType(ctxt, &tree);
6105
1.48M
      if (type <= 0) {
6106
22.1k
          break;
6107
22.1k
      }
6108
6109
1.46M
      GROW;
6110
1.46M
      if (SKIP_BLANKS == 0) {
6111
11.1k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
11.1k
             "Space required after the attribute type\n");
6113
11.1k
          if (tree != NULL)
6114
8.90k
        xmlFreeEnumeration(tree);
6115
11.1k
    break;
6116
11.1k
      }
6117
6118
1.44M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
1.44M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
1.44M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
127k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
1.44M
      GROW;
6130
1.44M
            if (RAW != '>') {
6131
1.22M
    if (SKIP_BLANKS == 0) {
6132
48.0k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
48.0k
      "Space required after the attribute default value\n");
6134
48.0k
        if (defaultValue != NULL)
6135
41.6k
      xmlFree(defaultValue);
6136
48.0k
        if (tree != NULL)
6137
10.0k
      xmlFreeEnumeration(tree);
6138
48.0k
        break;
6139
48.0k
    }
6140
1.22M
      }
6141
1.40M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
1.40M
    (ctxt->sax->attributeDecl != NULL))
6143
1.30M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
1.30M
                          type, def, defaultValue, tree);
6145
95.6k
      else if (tree != NULL)
6146
8.80k
    xmlFreeEnumeration(tree);
6147
6148
1.40M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
1.40M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
1.40M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
142k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
142k
      }
6153
1.40M
      if (ctxt->sax2) {
6154
1.02M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
1.02M
      }
6156
1.40M
      if (defaultValue != NULL)
6157
201k
          xmlFree(defaultValue);
6158
1.40M
      GROW;
6159
1.40M
  }
6160
717k
  if (RAW == '>') {
6161
604k
      if (inputid != ctxt->input->id) {
6162
15
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
15
                               "Attribute list declaration doesn't start and"
6164
15
                               " stop in the same entity\n");
6165
15
      }
6166
604k
      NEXT;
6167
604k
  }
6168
717k
    }
6169
725k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
285k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
285k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
285k
    const xmlChar *elem = NULL;
6196
6197
285k
    GROW;
6198
285k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
285k
  SKIP(7);
6200
285k
  SKIP_BLANKS;
6201
285k
  SHRINK;
6202
285k
  if (RAW == ')') {
6203
187k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
187k
      NEXT;
6209
187k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
187k
      if (ret == NULL)
6211
0
          return(NULL);
6212
187k
      if (RAW == '*') {
6213
934
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
934
    NEXT;
6215
934
      }
6216
187k
      return(ret);
6217
187k
  }
6218
97.6k
  if ((RAW == '(') || (RAW == '|')) {
6219
96.5k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
96.5k
      if (ret == NULL) return(NULL);
6221
96.5k
  }
6222
982k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
885k
      NEXT;
6224
885k
      if (elem == NULL) {
6225
96.4k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
96.4k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
96.4k
    ret->c1 = cur;
6231
96.4k
    if (cur != NULL)
6232
96.4k
        cur->parent = ret;
6233
96.4k
    cur = ret;
6234
789k
      } else {
6235
789k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
789k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
789k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
789k
    if (n->c1 != NULL)
6242
789k
        n->c1->parent = n;
6243
789k
          cur->c2 = n;
6244
789k
    if (n != NULL)
6245
789k
        n->parent = cur;
6246
789k
    cur = n;
6247
789k
      }
6248
885k
      SKIP_BLANKS;
6249
885k
      elem = xmlParseName(ctxt);
6250
885k
      if (elem == NULL) {
6251
1.24k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
1.24k
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
1.24k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
1.24k
    return(NULL);
6255
1.24k
      }
6256
884k
      SKIP_BLANKS;
6257
884k
      GROW;
6258
884k
  }
6259
96.3k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
93.0k
      if (elem != NULL) {
6261
93.0k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
93.0k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
93.0k
    if (cur->c2 != NULL)
6264
93.0k
        cur->c2->parent = cur;
6265
93.0k
            }
6266
93.0k
            if (ret != NULL)
6267
93.0k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
93.0k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
93.0k
      SKIP(2);
6274
93.0k
  } else {
6275
3.30k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
3.30k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
3.30k
      return(NULL);
6278
3.30k
  }
6279
6280
96.3k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
93.0k
    return(ret);
6284
285k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
1.40M
                                       int depth) {
6321
1.40M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
1.40M
    const xmlChar *elem;
6323
1.40M
    xmlChar type = 0;
6324
6325
1.40M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
1.40M
        (depth >  2048)) {
6327
170
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
170
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
170
                          depth);
6330
170
  return(NULL);
6331
170
    }
6332
1.40M
    SKIP_BLANKS;
6333
1.40M
    GROW;
6334
1.40M
    if (RAW == '(') {
6335
1.02M
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
1.02M
  NEXT;
6339
1.02M
  SKIP_BLANKS;
6340
1.02M
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
1.02M
                                                           depth + 1);
6342
1.02M
        if (cur == NULL)
6343
994k
            return(NULL);
6344
33.0k
  SKIP_BLANKS;
6345
33.0k
  GROW;
6346
378k
    } else {
6347
378k
  elem = xmlParseName(ctxt);
6348
378k
  if (elem == NULL) {
6349
6.86k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
6.86k
      return(NULL);
6351
6.86k
  }
6352
371k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
371k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
371k
  GROW;
6358
371k
  if (RAW == '?') {
6359
21.0k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
21.0k
      NEXT;
6361
350k
  } else if (RAW == '*') {
6362
31.3k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
31.3k
      NEXT;
6364
318k
  } else if (RAW == '+') {
6365
66.0k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
66.0k
      NEXT;
6367
252k
  } else {
6368
252k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
252k
  }
6370
371k
  GROW;
6371
371k
    }
6372
404k
    SKIP_BLANKS;
6373
404k
    SHRINK;
6374
1.50M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
1.13M
        if (RAW == ',') {
6379
300k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
184k
      else if (type != CUR) {
6385
115
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
115
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
115
                      type);
6388
115
    if ((last != NULL) && (last != ret))
6389
115
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
115
    if (ret != NULL)
6391
115
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
115
    return(NULL);
6393
115
      }
6394
300k
      NEXT;
6395
6396
300k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
300k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
300k
      if (last == NULL) {
6404
116k
    op->c1 = ret;
6405
116k
    if (ret != NULL)
6406
116k
        ret->parent = op;
6407
116k
    ret = cur = op;
6408
184k
      } else {
6409
184k
          cur->c2 = op;
6410
184k
    if (op != NULL)
6411
184k
        op->parent = cur;
6412
184k
    op->c1 = last;
6413
184k
    if (last != NULL)
6414
184k
        last->parent = op;
6415
184k
    cur =op;
6416
184k
    last = NULL;
6417
184k
      }
6418
830k
  } else if (RAW == '|') {
6419
806k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
682k
      else if (type != CUR) {
6425
126
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
126
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
126
          type);
6428
126
    if ((last != NULL) && (last != ret))
6429
126
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
126
    if (ret != NULL)
6431
126
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
126
    return(NULL);
6433
126
      }
6434
806k
      NEXT;
6435
6436
806k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
806k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
806k
      if (last == NULL) {
6445
124k
    op->c1 = ret;
6446
124k
    if (ret != NULL)
6447
124k
        ret->parent = op;
6448
124k
    ret = cur = op;
6449
682k
      } else {
6450
682k
          cur->c2 = op;
6451
682k
    if (op != NULL)
6452
682k
        op->parent = cur;
6453
682k
    op->c1 = last;
6454
682k
    if (last != NULL)
6455
682k
        last->parent = op;
6456
682k
    cur =op;
6457
682k
    last = NULL;
6458
682k
      }
6459
806k
  } else {
6460
23.9k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
23.9k
      if ((last != NULL) && (last != ret))
6462
9.58k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
23.9k
      if (ret != NULL)
6464
23.9k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
23.9k
      return(NULL);
6466
23.9k
  }
6467
1.10M
  GROW;
6468
1.10M
  SKIP_BLANKS;
6469
1.10M
  GROW;
6470
1.10M
  if (RAW == '(') {
6471
55.3k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
55.3k
      NEXT;
6474
55.3k
      SKIP_BLANKS;
6475
55.3k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
55.3k
                                                          depth + 1);
6477
55.3k
            if (last == NULL) {
6478
3.04k
    if (ret != NULL)
6479
3.04k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
3.04k
    return(NULL);
6481
3.04k
            }
6482
52.3k
      SKIP_BLANKS;
6483
1.05M
  } else {
6484
1.05M
      elem = xmlParseName(ctxt);
6485
1.05M
      if (elem == NULL) {
6486
2.65k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.65k
    if (ret != NULL)
6488
2.65k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.65k
    return(NULL);
6490
2.65k
      }
6491
1.04M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
1.04M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
1.04M
      if (RAW == '?') {
6498
102k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
102k
    NEXT;
6500
946k
      } else if (RAW == '*') {
6501
57.4k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
57.4k
    NEXT;
6503
889k
      } else if (RAW == '+') {
6504
19.0k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
19.0k
    NEXT;
6506
870k
      } else {
6507
870k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
870k
      }
6509
1.04M
  }
6510
1.10M
  SKIP_BLANKS;
6511
1.10M
  GROW;
6512
1.10M
    }
6513
374k
    if ((cur != NULL) && (last != NULL)) {
6514
224k
        cur->c2 = last;
6515
224k
  if (last != NULL)
6516
224k
      last->parent = cur;
6517
224k
    }
6518
374k
    if (ctxt->input->id != inputchk) {
6519
37
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
37
                       "Element content declaration doesn't start and stop in"
6521
37
                       " the same entity\n");
6522
37
    }
6523
374k
    NEXT;
6524
374k
    if (RAW == '?') {
6525
7.75k
  if (ret != NULL) {
6526
7.75k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
7.75k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
464
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
7.28k
      else
6530
7.28k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
7.75k
  }
6532
7.75k
  NEXT;
6533
366k
    } else if (RAW == '*') {
6534
98.5k
  if (ret != NULL) {
6535
98.5k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
98.5k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
579k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
480k
    if ((cur->c1 != NULL) &&
6543
480k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
480k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
12.5k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
480k
    if ((cur->c2 != NULL) &&
6547
480k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
480k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
2.53k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
480k
    cur = cur->c2;
6551
480k
      }
6552
98.5k
  }
6553
98.5k
  NEXT;
6554
268k
    } else if (RAW == '+') {
6555
44.2k
  if (ret != NULL) {
6556
44.2k
      int found = 0;
6557
6558
44.2k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
44.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
672
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
43.5k
      else
6562
43.5k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
77.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
33.3k
    if ((cur->c1 != NULL) &&
6570
33.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
33.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
533
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
533
        found = 1;
6574
533
    }
6575
33.3k
    if ((cur->c2 != NULL) &&
6576
33.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
33.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
408
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
408
        found = 1;
6580
408
    }
6581
33.3k
    cur = cur->c2;
6582
33.3k
      }
6583
44.2k
      if (found)
6584
907
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
44.2k
  }
6586
44.2k
  NEXT;
6587
44.2k
    }
6588
374k
    return(ret);
6589
404k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
608k
                           xmlElementContentPtr *result) {
6648
6649
608k
    xmlElementContentPtr tree = NULL;
6650
608k
    int inputid = ctxt->input->id;
6651
608k
    int res;
6652
6653
608k
    *result = NULL;
6654
6655
608k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
608k
    NEXT;
6661
608k
    GROW;
6662
608k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
608k
    SKIP_BLANKS;
6665
608k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
285k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
285k
  res = XML_ELEMENT_TYPE_MIXED;
6668
322k
    } else {
6669
322k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
322k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
322k
    }
6672
608k
    SKIP_BLANKS;
6673
608k
    *result = tree;
6674
608k
    return(res);
6675
608k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
804k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
804k
    const xmlChar *name;
6695
804k
    int ret = -1;
6696
804k
    xmlElementContentPtr content  = NULL;
6697
6698
804k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
804k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
804k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
801k
  int inputid = ctxt->input->id;
6705
6706
801k
  SKIP(7);
6707
801k
  if (SKIP_BLANKS == 0) {
6708
4.45k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
4.45k
               "Space required after 'ELEMENT'\n");
6710
4.45k
      return(-1);
6711
4.45k
  }
6712
796k
        name = xmlParseName(ctxt);
6713
796k
  if (name == NULL) {
6714
3.50k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
3.50k
         "xmlParseElementDecl: no name for Element\n");
6716
3.50k
      return(-1);
6717
3.50k
  }
6718
793k
  if (SKIP_BLANKS == 0) {
6719
19.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
19.9k
         "Space required after the element name\n");
6721
19.9k
  }
6722
793k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
160k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
160k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
632k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
632k
             (NXT(2) == 'Y')) {
6730
4.78k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
4.78k
      ret = XML_ELEMENT_TYPE_ANY;
6735
627k
  } else if (RAW == '(') {
6736
608k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
608k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
19.7k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
19.7k
          (ctxt->inputNr == 1)) {
6743
930
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
930
    "PEReference: forbidden within markup decl in internal subset\n");
6745
18.7k
      } else {
6746
18.7k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
18.7k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
18.7k
            }
6749
19.7k
      return(-1);
6750
19.7k
  }
6751
6752
773k
  SKIP_BLANKS;
6753
6754
773k
  if (RAW != '>') {
6755
36.2k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
36.2k
      if (content != NULL) {
6757
4.81k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
4.81k
      }
6759
737k
  } else {
6760
737k
      if (inputid != ctxt->input->id) {
6761
33
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
33
                               "Element declaration doesn't start and stop in"
6763
33
                               " the same entity\n");
6764
33
      }
6765
6766
737k
      NEXT;
6767
737k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
737k
    (ctxt->sax->elementDecl != NULL)) {
6769
670k
    if (content != NULL)
6770
513k
        content->parent = NULL;
6771
670k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
670k
                           content);
6773
670k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
49.7k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
49.7k
    }
6782
670k
      } else if (content != NULL) {
6783
51.0k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
51.0k
      }
6785
737k
  }
6786
773k
    }
6787
777k
    return(ret);
6788
804k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
10.4k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
10.4k
    int *inputIds = NULL;
6806
10.4k
    size_t inputIdsSize = 0;
6807
10.4k
    size_t depth = 0;
6808
6809
50.3k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
49.9k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
26.1k
            int id = ctxt->input->id;
6812
6813
26.1k
            SKIP(3);
6814
26.1k
            SKIP_BLANKS;
6815
6816
26.1k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
20.6k
                SKIP(7);
6818
20.6k
                SKIP_BLANKS;
6819
20.6k
                if (RAW != '[') {
6820
271
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
271
                    xmlHaltParser(ctxt);
6822
271
                    goto error;
6823
271
                }
6824
20.3k
                if (ctxt->input->id != id) {
6825
12
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
12
                                   "All markup of the conditional section is"
6827
12
                                   " not in the same entity\n");
6828
12
                }
6829
20.3k
                NEXT;
6830
6831
20.3k
                if (inputIdsSize <= depth) {
6832
7.14k
                    int *tmp;
6833
6834
7.14k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
7.14k
                    tmp = (int *) xmlRealloc(inputIds,
6836
7.14k
                            inputIdsSize * sizeof(int));
6837
7.14k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
7.14k
                    inputIds = tmp;
6842
7.14k
                }
6843
20.3k
                inputIds[depth] = id;
6844
20.3k
                depth++;
6845
20.3k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
3.80k
                size_t ignoreDepth = 0;
6847
6848
3.80k
                SKIP(6);
6849
3.80k
                SKIP_BLANKS;
6850
3.80k
                if (RAW != '[') {
6851
178
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
178
                    xmlHaltParser(ctxt);
6853
178
                    goto error;
6854
178
                }
6855
3.62k
                if (ctxt->input->id != id) {
6856
5
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
5
                                   "All markup of the conditional section is"
6858
5
                                   " not in the same entity\n");
6859
5
                }
6860
3.62k
                NEXT;
6861
6862
4.42M
                while (RAW != 0) {
6863
4.42M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
10.7k
                        SKIP(3);
6865
10.7k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
10.7k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
4.41M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
4.41M
                               (NXT(2) == '>')) {
6873
7.15k
                        if (ignoreDepth == 0)
6874
1.48k
                            break;
6875
5.66k
                        SKIP(3);
6876
5.66k
                        ignoreDepth--;
6877
4.40M
                    } else {
6878
4.40M
                        NEXT;
6879
4.40M
                    }
6880
4.42M
                }
6881
6882
3.62k
    if (RAW == 0) {
6883
2.13k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.13k
                    goto error;
6885
2.13k
    }
6886
1.48k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
1.48k
                SKIP(3);
6892
1.73k
            } else {
6893
1.73k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
1.73k
                xmlHaltParser(ctxt);
6895
1.73k
                goto error;
6896
1.73k
            }
6897
26.1k
        } else if ((depth > 0) &&
6898
23.8k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
6.23k
            depth--;
6900
6.23k
            if (ctxt->input->id != inputIds[depth]) {
6901
114
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
114
                               "All markup of the conditional section is not"
6903
114
                               " in the same entity\n");
6904
114
            }
6905
6.23k
            SKIP(3);
6906
17.5k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
14.7k
            xmlParseMarkupDecl(ctxt);
6908
14.7k
        } else {
6909
2.79k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
2.79k
            xmlHaltParser(ctxt);
6911
2.79k
            goto error;
6912
2.79k
        }
6913
6914
42.8k
        if (depth == 0)
6915
2.97k
            break;
6916
6917
39.8k
        SKIP_BLANKS;
6918
39.8k
        GROW;
6919
39.8k
    }
6920
6921
10.4k
error:
6922
10.4k
    xmlFree(inputIds);
6923
10.4k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
14.0M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
14.0M
    GROW;
6952
14.0M
    if (CUR == '<') {
6953
14.0M
        if (NXT(1) == '!') {
6954
13.9M
      switch (NXT(2)) {
6955
1.84M
          case 'E':
6956
1.84M
        if (NXT(3) == 'L')
6957
804k
      xmlParseElementDecl(ctxt);
6958
1.04M
        else if (NXT(3) == 'N')
6959
1.04M
      xmlParseEntityDecl(ctxt);
6960
1.71k
                    else
6961
1.71k
                        SKIP(2);
6962
1.84M
        break;
6963
725k
          case 'A':
6964
725k
        xmlParseAttributeListDecl(ctxt);
6965
725k
        break;
6966
24.4k
          case 'N':
6967
24.4k
        xmlParseNotationDecl(ctxt);
6968
24.4k
        break;
6969
11.3M
          case '-':
6970
11.3M
        xmlParseComment(ctxt);
6971
11.3M
        break;
6972
28.6k
    default:
6973
        /* there is an error but it will be detected later */
6974
28.6k
                    SKIP(2);
6975
28.6k
        break;
6976
13.9M
      }
6977
13.9M
  } else if (NXT(1) == '?') {
6978
24.5k
      xmlParsePI(ctxt);
6979
24.5k
  }
6980
14.0M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
14.0M
    if (ctxt->instate == XML_PARSER_EOF)
6987
28.4k
        return;
6988
6989
13.9M
    ctxt->instate = XML_PARSER_DTD;
6990
13.9M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
9.94k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
9.94k
    xmlChar *version;
7006
9.94k
    const xmlChar *encoding;
7007
9.94k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
9.94k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
9.60k
  SKIP(5);
7014
9.60k
    } else {
7015
340
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
340
  return;
7017
340
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
9.60k
    oldstate = ctxt->instate;
7021
9.60k
    ctxt->instate = XML_PARSER_START;
7022
7023
9.60k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
9.60k
    version = xmlParseVersionInfo(ctxt);
7032
9.60k
    if (version == NULL)
7033
2.67k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
6.92k
    else {
7035
6.92k
  if (SKIP_BLANKS == 0) {
7036
757
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
757
               "Space needed here\n");
7038
757
  }
7039
6.92k
    }
7040
9.60k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
9.60k
    encoding = xmlParseEncodingDecl(ctxt);
7046
9.60k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
9.60k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
230
        ctxt->instate = oldstate;
7053
230
        return;
7054
230
    }
7055
9.37k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
2.78k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
2.78k
           "Missing encoding in text declaration\n");
7058
2.78k
    }
7059
7060
9.37k
    SKIP_BLANKS;
7061
9.37k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
3.49k
        SKIP(2);
7063
5.87k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
133
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
133
  NEXT;
7067
5.74k
    } else {
7068
5.74k
        int c;
7069
7070
5.74k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
230k
        while ((c = CUR) != 0) {
7072
228k
            NEXT;
7073
228k
            if (c == '>')
7074
3.66k
                break;
7075
228k
        }
7076
5.74k
    }
7077
7078
9.37k
    ctxt->instate = oldstate;
7079
9.37k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
49.3k
                       const xmlChar *SystemID) {
7096
49.3k
    xmlDetectSAX2(ctxt);
7097
49.3k
    GROW;
7098
7099
49.3k
    if ((ctxt->encoding == NULL) &&
7100
49.3k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
49.2k
        xmlChar start[4];
7102
49.2k
  xmlCharEncoding enc;
7103
7104
49.2k
  start[0] = RAW;
7105
49.2k
  start[1] = NXT(1);
7106
49.2k
  start[2] = NXT(2);
7107
49.2k
  start[3] = NXT(3);
7108
49.2k
  enc = xmlDetectCharEncoding(start, 4);
7109
49.2k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
9.04k
      xmlSwitchEncoding(ctxt, enc);
7111
49.2k
    }
7112
7113
49.3k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
7.91k
  xmlParseTextDecl(ctxt);
7115
7.91k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
203
      xmlHaltParser(ctxt);
7120
203
      return;
7121
203
  }
7122
7.91k
    }
7123
49.1k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
49.1k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
49.1k
    ctxt->instate = XML_PARSER_DTD;
7135
49.1k
    ctxt->external = 1;
7136
49.1k
    SKIP_BLANKS;
7137
2.07M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
2.04M
  GROW;
7139
2.04M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
10.4k
            xmlParseConditionalSections(ctxt);
7141
2.03M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
2.01M
            xmlParseMarkupDecl(ctxt);
7143
2.01M
        } else {
7144
15.3k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
15.3k
            xmlHaltParser(ctxt);
7146
15.3k
            return;
7147
15.3k
        }
7148
2.02M
        SKIP_BLANKS;
7149
2.02M
    }
7150
7151
33.8k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
33.8k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
4.80M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
4.80M
    xmlEntityPtr ent;
7175
4.80M
    xmlChar *val;
7176
4.80M
    int was_checked;
7177
4.80M
    xmlNodePtr list = NULL;
7178
4.80M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
4.80M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
4.80M
    if (NXT(1) == '#') {
7188
972k
  int i = 0;
7189
972k
  xmlChar out[16];
7190
972k
  int hex = NXT(2);
7191
972k
  int value = xmlParseCharRef(ctxt);
7192
7193
972k
  if (value == 0)
7194
321k
      return;
7195
650k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
553k
      if (value <= 0xFF) {
7202
505k
    out[0] = value;
7203
505k
    out[1] = 0;
7204
505k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
505k
        (!ctxt->disableSAX))
7206
455k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
505k
      } else {
7208
48.4k
    if ((hex == 'x') || (hex == 'X'))
7209
19.0k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
29.3k
    else
7211
29.3k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
48.4k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
48.4k
        (!ctxt->disableSAX))
7214
41.5k
        ctxt->sax->reference(ctxt->userData, out);
7215
48.4k
      }
7216
553k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
97.2k
      COPY_BUF(0 ,out, i, value);
7221
97.2k
      out[i] = 0;
7222
97.2k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
97.2k
    (!ctxt->disableSAX))
7224
85.5k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
97.2k
  }
7226
650k
  return;
7227
972k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
3.83M
    ent = xmlParseEntityRef(ctxt);
7233
3.83M
    if (ent == NULL) return;
7234
2.76M
    if (!ctxt->wellFormed)
7235
1.17M
  return;
7236
1.59M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
1.59M
    if ((ent->name == NULL) ||
7240
1.59M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
97.9k
  val = ent->content;
7242
97.9k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
97.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
97.9k
      (!ctxt->disableSAX))
7248
97.9k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
97.9k
  return;
7250
97.9k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
1.49M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
1.49M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
97.7k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
92.2k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
92.2k
  void *user_data;
7273
92.2k
  if (ctxt->userData == ctxt)
7274
92.2k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
92.2k
        ctxt->sizeentcopy = 0;
7280
7281
92.2k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
543
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
543
            xmlHaltParser(ctxt);
7284
543
            return;
7285
543
        }
7286
7287
91.7k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
91.7k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
54.0k
      ctxt->depth++;
7297
54.0k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
54.0k
                                                user_data, &list);
7299
54.0k
      ctxt->depth--;
7300
7301
54.0k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
37.6k
      ctxt->depth++;
7303
37.6k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
37.6k
                                     user_data, ctxt->depth, ent->URI,
7305
37.6k
             ent->ExternalID, &list);
7306
37.6k
      ctxt->depth--;
7307
37.6k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
91.7k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
91.7k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
91.7k
        ent->expandedSize = ctxt->sizeentcopy;
7316
91.7k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
3.35k
            xmlHaltParser(ctxt);
7318
3.35k
      xmlFreeNodeList(list);
7319
3.35k
      return;
7320
3.35k
  }
7321
88.3k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
88.3k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
50.5k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
50.5k
            if ((ctxt->replaceEntities == 0) ||
7333
50.5k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
50.5k
                ((list->type == XML_TEXT_NODE) &&
7335
41.7k
                 (list->next == NULL))) {
7336
41.7k
                ent->owner = 1;
7337
175k
                while (list != NULL) {
7338
134k
                    list->parent = (xmlNodePtr) ent;
7339
134k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
134k
                    if (list->next == NULL)
7342
41.7k
                        ent->last = list;
7343
134k
                    list = list->next;
7344
134k
                }
7345
41.7k
                list = NULL;
7346
41.7k
            } else {
7347
8.80k
                ent->owner = 0;
7348
170k
                while (list != NULL) {
7349
162k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
162k
                    list->doc = ctxt->myDoc;
7351
162k
                    if (list->next == NULL)
7352
8.80k
                        ent->last = list;
7353
162k
                    list = list->next;
7354
162k
                }
7355
8.80k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
8.80k
            }
7361
50.5k
  } else if ((ret != XML_ERR_OK) &&
7362
37.8k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
19.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
19.0k
         "Entity '%s' failed to parse\n", ent->name);
7365
19.0k
            if (ent->content != NULL)
7366
4.36k
                ent->content[0] = 0;
7367
19.0k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
88.3k
        was_checked = 0;
7374
88.3k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
1.49M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
271k
  if (was_checked != 0) {
7389
228k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
228k
      if (ctxt->userData == ctxt)
7396
228k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
228k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
3.44k
    ctxt->depth++;
7402
3.44k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
3.44k
           ent->content, user_data, NULL);
7404
3.44k
    ctxt->depth--;
7405
225k
      } else if (ent->etype ==
7406
225k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
225k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
225k
    ctxt->depth++;
7410
225k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
225k
         ctxt->sax, user_data, ctxt->depth,
7412
225k
         ent->URI, ent->ExternalID, NULL);
7413
225k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
225k
                ctxt->sizeentities = oldsizeentities;
7417
225k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
228k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
228k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
228k
  }
7429
271k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
271k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
49.7k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
49.7k
  }
7437
271k
  return;
7438
271k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
1.22M
    if ((was_checked != 0) &&
7445
1.22M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
127
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
1.22M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
1.22M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
207k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
207k
  return;
7458
207k
    }
7459
7460
1.01M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
1.01M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
1.01M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
1.01M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
320k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
320k
    cur = ent->children;
7492
424k
    while (cur != NULL) {
7493
424k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
424k
        if (nw != NULL) {
7495
424k
      if (nw->_private == NULL)
7496
424k
          nw->_private = cur->_private;
7497
424k
      if (firstChild == NULL){
7498
320k
          firstChild = nw;
7499
320k
      }
7500
424k
      nw = xmlAddChild(ctxt->node, nw);
7501
424k
        }
7502
424k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
320k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
320k
          (nw != NULL) &&
7509
320k
          (nw->type == XML_ELEMENT_NODE) &&
7510
320k
          (nw->children == NULL))
7511
3.52k
          nw->extra = 1;
7512
7513
320k
      break;
7514
320k
        }
7515
103k
        cur = cur->next;
7516
103k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
692k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
692k
    xmlNodePtr nw = NULL, cur, next, last,
7523
692k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
692k
    cur = ent->children;
7532
692k
    ent->children = NULL;
7533
692k
    last = ent->last;
7534
692k
    ent->last = NULL;
7535
1.62M
    while (cur != NULL) {
7536
1.62M
        next = cur->next;
7537
1.62M
        cur->next = NULL;
7538
1.62M
        cur->parent = NULL;
7539
1.62M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
1.62M
        if (nw != NULL) {
7541
1.62M
      if (nw->_private == NULL)
7542
1.62M
          nw->_private = cur->_private;
7543
1.62M
      if (firstChild == NULL){
7544
692k
          firstChild = cur;
7545
692k
      }
7546
1.62M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
1.62M
        }
7548
1.62M
        xmlAddChild(ctxt->node, cur);
7549
1.62M
        if (cur == last)
7550
692k
      break;
7551
935k
        cur = next;
7552
935k
    }
7553
692k
    if (ent->owner == 0)
7554
8.80k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
692k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
1.01M
      ctxt->nodemem = 0;
7582
1.01M
      ctxt->nodelen = 0;
7583
1.01M
      return;
7584
1.01M
  }
7585
1.01M
    }
7586
1.01M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
5.72M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
5.72M
    const xmlChar *name;
7621
5.72M
    xmlEntityPtr ent = NULL;
7622
7623
5.72M
    GROW;
7624
5.72M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
5.72M
    if (RAW != '&')
7628
0
        return(NULL);
7629
5.72M
    NEXT;
7630
5.72M
    name = xmlParseName(ctxt);
7631
5.72M
    if (name == NULL) {
7632
555k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
555k
           "xmlParseEntityRef: no name\n");
7634
555k
        return(NULL);
7635
555k
    }
7636
5.16M
    if (RAW != ';') {
7637
390k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
390k
  return(NULL);
7639
390k
    }
7640
4.77M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
4.77M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
3.63M
        ent = xmlGetPredefinedEntity(name);
7647
3.63M
        if (ent != NULL)
7648
475k
            return(ent);
7649
3.63M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
4.29M
    if (ctxt->sax != NULL) {
7656
4.29M
  if (ctxt->sax->getEntity != NULL)
7657
4.29M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
4.29M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
4.29M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
16.0k
      ent = xmlGetPredefinedEntity(name);
7661
4.29M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
4.29M
      (ctxt->userData==ctxt)) {
7663
48.4k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
48.4k
  }
7665
4.29M
    }
7666
4.29M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
4.29M
    if (ent == NULL) {
7690
607k
  if ((ctxt->standalone == 1) ||
7691
607k
      ((ctxt->hasExternalSubset == 0) &&
7692
598k
       (ctxt->hasPErefs == 0))) {
7693
419k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
419k
         "Entity '%s' not defined\n", name);
7695
419k
  } else {
7696
188k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
188k
         "Entity '%s' not defined\n", name);
7698
188k
      if ((ctxt->inSubset == 0) &&
7699
188k
    (ctxt->sax != NULL) &&
7700
188k
    (ctxt->sax->reference != NULL)) {
7701
182k
    ctxt->sax->reference(ctxt->userData, name);
7702
182k
      }
7703
188k
  }
7704
607k
  ctxt->valid = 0;
7705
607k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
3.69M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
3.01k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
3.01k
     "Entity reference to unparsed entity %s\n", name);
7715
3.01k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
3.68M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
3.68M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
25.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
25.5k
       "Attribute references external entity '%s'\n", name);
7726
25.5k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
3.66M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
3.66M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.12M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
27.3k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
3.03k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
27.3k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
27.3k
        }
7740
1.12M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
20.1k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
20.1k
                    "'<' in entity '%s' is not allowed in attributes "
7743
20.1k
                    "values\n", name);
7744
1.12M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
2.53M
    else {
7750
2.53M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
2.53M
      default:
7758
2.53M
      break;
7759
2.53M
  }
7760
2.53M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
4.29M
    return(ent);
7769
4.29M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
24.3M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
24.3M
    xmlChar *name;
7805
24.3M
    const xmlChar *ptr;
7806
24.3M
    xmlChar cur;
7807
24.3M
    xmlEntityPtr ent = NULL;
7808
7809
24.3M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
24.3M
    ptr = *str;
7812
24.3M
    cur = *ptr;
7813
24.3M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
24.3M
    ptr++;
7817
24.3M
    name = xmlParseStringName(ctxt, &ptr);
7818
24.3M
    if (name == NULL) {
7819
1.81k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
1.81k
           "xmlParseStringEntityRef: no name\n");
7821
1.81k
  *str = ptr;
7822
1.81k
  return(NULL);
7823
1.81k
    }
7824
24.3M
    if (*ptr != ';') {
7825
3.63k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
3.63k
        xmlFree(name);
7827
3.63k
  *str = ptr;
7828
3.63k
  return(NULL);
7829
3.63k
    }
7830
24.3M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
24.3M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
21.5M
        ent = xmlGetPredefinedEntity(name);
7838
21.5M
        if (ent != NULL) {
7839
14.1k
            xmlFree(name);
7840
14.1k
            *str = ptr;
7841
14.1k
            return(ent);
7842
14.1k
        }
7843
21.5M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
24.3M
    if (ctxt->sax != NULL) {
7850
24.3M
  if (ctxt->sax->getEntity != NULL)
7851
24.3M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
24.3M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
557k
      ent = xmlGetPredefinedEntity(name);
7854
24.3M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
3.12M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
3.12M
  }
7857
24.3M
    }
7858
24.3M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
24.3M
    if (ent == NULL) {
7885
3.12M
  if ((ctxt->standalone == 1) ||
7886
3.12M
      ((ctxt->hasExternalSubset == 0) &&
7887
3.12M
       (ctxt->hasPErefs == 0))) {
7888
3.11M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
3.11M
         "Entity '%s' not defined\n", name);
7890
3.11M
  } else {
7891
7.13k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
7.13k
        "Entity '%s' not defined\n",
7893
7.13k
        name);
7894
7.13k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
3.12M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
21.2M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
419
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
419
     "Entity reference to unparsed entity %s\n", name);
7906
419
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
21.2M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
21.2M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
1.17k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
1.17k
   "Attribute references external entity '%s'\n", name);
7917
1.17k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
21.2M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
21.2M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
21.1M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
9.48k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
699
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
9.48k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
9.48k
        }
7931
21.1M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
23.1k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
23.1k
                    "'<' in entity '%s' is not allowed in attributes "
7934
23.1k
                    "values\n", name);
7935
21.1M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
63.4k
    else {
7941
63.4k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
63.4k
      default:
7949
63.4k
      break;
7950
63.4k
  }
7951
63.4k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
24.3M
    xmlFree(name);
7961
24.3M
    *str = ptr;
7962
24.3M
    return(ent);
7963
24.3M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
12.0M
{
8000
12.0M
    const xmlChar *name;
8001
12.0M
    xmlEntityPtr entity = NULL;
8002
12.0M
    xmlParserInputPtr input;
8003
8004
12.0M
    if (RAW != '%')
8005
0
        return;
8006
12.0M
    NEXT;
8007
12.0M
    name = xmlParseName(ctxt);
8008
12.0M
    if (name == NULL) {
8009
51.8k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
51.8k
  return;
8011
51.8k
    }
8012
12.0M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
12.0M
    if (RAW != ';') {
8016
10.5k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
10.5k
        return;
8018
10.5k
    }
8019
8020
12.0M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
12.0M
    if ((ctxt->sax != NULL) &&
8026
12.0M
  (ctxt->sax->getParameterEntity != NULL))
8027
12.0M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
12.0M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
12.0M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
1.21M
  if ((ctxt->standalone == 1) ||
8040
1.21M
      ((ctxt->hasExternalSubset == 0) &&
8041
1.21M
       (ctxt->hasPErefs == 0))) {
8042
2.48k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
2.48k
            "PEReference: %%%s; not found\n",
8044
2.48k
            name);
8045
1.21M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
1.21M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
10.7k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
10.7k
                                 "PEReference: %%%s; not found\n",
8056
10.7k
                                 name, NULL);
8057
10.7k
            } else
8058
1.20M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
1.20M
                              "PEReference: %%%s; not found\n",
8060
1.20M
                              name, NULL);
8061
1.21M
            ctxt->valid = 0;
8062
1.21M
  }
8063
10.7M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
10.7M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
10.7M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
10.7M
  } else {
8073
10.7M
            xmlChar start[4];
8074
10.7M
            xmlCharEncoding enc;
8075
10.7M
            unsigned long parentConsumed;
8076
10.7M
            xmlEntityPtr oldEnt;
8077
8078
10.7M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
10.7M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
10.7M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
10.7M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
10.7M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
10.7M
    (ctxt->replaceEntities == 0) &&
8084
10.7M
    (ctxt->validate == 0))
8085
464
    return;
8086
8087
10.7M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
196
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
196
                xmlHaltParser(ctxt);
8090
196
                return;
8091
196
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
10.7M
            parentConsumed = ctxt->input->parentConsumed;
8095
10.7M
            oldEnt = ctxt->input->entity;
8096
10.7M
            if ((oldEnt == NULL) ||
8097
10.7M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
10.3M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
570k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
570k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
570k
                                     ctxt->input->cur - ctxt->input->base);
8102
570k
            }
8103
8104
10.7M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
10.7M
      if (xmlPushInput(ctxt, input) < 0) {
8106
7.31k
                xmlFreeInputStream(input);
8107
7.31k
    return;
8108
7.31k
            }
8109
8110
10.7M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
10.7M
            input->parentConsumed = parentConsumed;
8113
8114
10.7M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
5.05k
                GROW
8125
5.05k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
5.05k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
4.98k
                    start[0] = RAW;
8129
4.98k
                    start[1] = NXT(1);
8130
4.98k
                    start[2] = NXT(2);
8131
4.98k
                    start[3] = NXT(3);
8132
4.98k
                    enc = xmlDetectCharEncoding(start, 4);
8133
4.98k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
1.73k
                        xmlSwitchEncoding(ctxt, enc);
8135
1.73k
                    }
8136
4.98k
                }
8137
8138
5.05k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
5.05k
                    (IS_BLANK_CH(NXT(5)))) {
8140
1.33k
                    xmlParseTextDecl(ctxt);
8141
1.33k
                }
8142
5.05k
            }
8143
10.7M
  }
8144
10.7M
    }
8145
12.0M
    ctxt->hasPErefs = 1;
8146
12.0M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
2.09k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
2.09k
    xmlParserInputPtr input;
8162
2.09k
    xmlBufferPtr buf;
8163
2.09k
    int l, c;
8164
2.09k
    int count = 0;
8165
8166
2.09k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
2.09k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
2.09k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
2.09k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
2.09k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
2.09k
    buf = xmlBufferCreate();
8180
2.09k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
2.09k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
2.09k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
2.09k
    if (input == NULL) {
8189
341
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
341
              "xmlLoadEntityContent input error");
8191
341
  xmlBufferFree(buf);
8192
341
        return(-1);
8193
341
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
1.75k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
1.75k
    GROW;
8206
1.75k
    c = CUR_CHAR(l);
8207
3.01M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
3.01M
           (IS_CHAR(c))) {
8209
3.01M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
3.01M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
28.7k
      count = 0;
8212
28.7k
      GROW;
8213
28.7k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
28.7k
  }
8218
3.01M
  NEXTL(l);
8219
3.01M
  c = CUR_CHAR(l);
8220
3.01M
  if (c == 0) {
8221
1.53k
      count = 0;
8222
1.53k
      GROW;
8223
1.53k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
1.53k
      c = CUR_CHAR(l);
8228
1.53k
  }
8229
3.01M
    }
8230
8231
1.75k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
1.02k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
1.02k
        xmlPopInput(ctxt);
8234
1.02k
    } else if (!IS_CHAR(c)) {
8235
726
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
726
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
726
                    c);
8238
726
  xmlBufferFree(buf);
8239
726
  return(-1);
8240
726
    }
8241
1.02k
    entity->content = buf->content;
8242
1.02k
    entity->length = buf->use;
8243
1.02k
    buf->content = NULL;
8244
1.02k
    xmlBufferFree(buf);
8245
8246
1.02k
    return(0);
8247
1.75k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
400k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
400k
    const xmlChar *ptr;
8283
400k
    xmlChar cur;
8284
400k
    xmlChar *name;
8285
400k
    xmlEntityPtr entity = NULL;
8286
8287
400k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
400k
    ptr = *str;
8289
400k
    cur = *ptr;
8290
400k
    if (cur != '%')
8291
0
        return(NULL);
8292
400k
    ptr++;
8293
400k
    name = xmlParseStringName(ctxt, &ptr);
8294
400k
    if (name == NULL) {
8295
4.24k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
4.24k
           "xmlParseStringPEReference: no name\n");
8297
4.24k
  *str = ptr;
8298
4.24k
  return(NULL);
8299
4.24k
    }
8300
395k
    cur = *ptr;
8301
395k
    if (cur != ';') {
8302
1.09k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
1.09k
  xmlFree(name);
8304
1.09k
  *str = ptr;
8305
1.09k
  return(NULL);
8306
1.09k
    }
8307
394k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
394k
    if ((ctxt->sax != NULL) &&
8313
394k
  (ctxt->sax->getParameterEntity != NULL))
8314
394k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
394k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
394k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
23.9k
  if ((ctxt->standalone == 1) ||
8330
23.9k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
23.9k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
23.9k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
23.9k
        "PEReference: %%%s; not found\n",
8343
23.9k
        name, NULL);
8344
23.9k
      ctxt->valid = 0;
8345
23.9k
  }
8346
370k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
370k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
370k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
370k
    }
8357
394k
    ctxt->hasPErefs = 1;
8358
394k
    xmlFree(name);
8359
394k
    *str = ptr;
8360
394k
    return(entity);
8361
394k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
409k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
409k
    const xmlChar *name = NULL;
8382
409k
    xmlChar *ExternalID = NULL;
8383
409k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
409k
    SKIP(9);
8389
8390
409k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
409k
    name = xmlParseName(ctxt);
8396
409k
    if (name == NULL) {
8397
1.83k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
1.83k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
1.83k
    }
8400
409k
    ctxt->intSubName = name;
8401
8402
409k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
409k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
409k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
148k
        ctxt->hasExternalSubset = 1;
8411
148k
    }
8412
409k
    ctxt->extSubURI = URI;
8413
409k
    ctxt->extSubSystem = ExternalID;
8414
8415
409k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
409k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
409k
  (!ctxt->disableSAX))
8422
386k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
409k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
409k
    if (RAW == '[')
8431
281k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
128k
    if (RAW != '>') {
8437
41.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
41.3k
    }
8439
128k
    NEXT;
8440
128k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
288k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
288k
    if (RAW == '[') {
8457
288k
        int baseInputNr = ctxt->inputNr;
8458
288k
        ctxt->instate = XML_PARSER_DTD;
8459
288k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
288k
  SKIP_BLANKS;
8466
12.3M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
12.3M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
12.1M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
12.1M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
12.1M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
11.9M
          xmlParseMarkupDecl(ctxt);
8478
11.9M
            } else if (RAW == '%') {
8479
70.0k
          xmlParsePEReference(ctxt);
8480
94.6k
            } else {
8481
94.6k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
94.6k
                        "xmlParseInternalSubset: error detected in"
8483
94.6k
                        " Markup declaration\n");
8484
94.6k
                xmlHaltParser(ctxt);
8485
94.6k
                return;
8486
94.6k
            }
8487
12.0M
      SKIP_BLANKS;
8488
12.0M
  }
8489
193k
  if (RAW == ']') {
8490
170k
      NEXT;
8491
170k
      SKIP_BLANKS;
8492
170k
  }
8493
193k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
193k
    if (RAW != '>') {
8499
25.5k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
25.5k
  return;
8501
25.5k
    }
8502
168k
    NEXT;
8503
168k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
4.73M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
4.73M
    const xmlChar *name;
8544
4.73M
    xmlChar *val;
8545
8546
4.73M
    *value = NULL;
8547
4.73M
    GROW;
8548
4.73M
    name = xmlParseName(ctxt);
8549
4.73M
    if (name == NULL) {
8550
1.44M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
1.44M
                 "error parsing attribute name\n");
8552
1.44M
        return(NULL);
8553
1.44M
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
3.28M
    SKIP_BLANKS;
8559
3.28M
    if (RAW == '=') {
8560
2.75M
        NEXT;
8561
2.75M
  SKIP_BLANKS;
8562
2.75M
  val = xmlParseAttValue(ctxt);
8563
2.75M
  ctxt->instate = XML_PARSER_CONTENT;
8564
2.75M
    } else {
8565
529k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
529k
         "Specification mandates value for attribute %s\n", name);
8567
529k
  return(name);
8568
529k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
2.75M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
62.0k
  if (!xmlCheckLanguageID(val)) {
8577
50.5k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
50.5k
              "Malformed value for xml:lang : %s\n",
8579
50.5k
        val, NULL);
8580
50.5k
  }
8581
62.0k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
2.75M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
5.00k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
97
      *(ctxt->space) = 0;
8589
4.90k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
642
      *(ctxt->space) = 1;
8591
4.26k
  else {
8592
4.26k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
4.26k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
4.26k
                                 val, NULL);
8595
4.26k
  }
8596
5.00k
    }
8597
8598
2.75M
    *value = val;
8599
2.75M
    return(name);
8600
3.28M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
5.00M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
5.00M
    const xmlChar *name;
8634
5.00M
    const xmlChar *attname;
8635
5.00M
    xmlChar *attvalue;
8636
5.00M
    const xmlChar **atts = ctxt->atts;
8637
5.00M
    int nbatts = 0;
8638
5.00M
    int maxatts = ctxt->maxatts;
8639
5.00M
    int i;
8640
8641
5.00M
    if (RAW != '<') return(NULL);
8642
5.00M
    NEXT1;
8643
8644
5.00M
    name = xmlParseName(ctxt);
8645
5.00M
    if (name == NULL) {
8646
411k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
411k
       "xmlParseStartTag: invalid element name\n");
8648
411k
        return(NULL);
8649
411k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
4.59M
    SKIP_BLANKS;
8657
4.59M
    GROW;
8658
8659
6.62M
    while (((RAW != '>') &&
8660
6.62M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
6.62M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
4.73M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
4.73M
        if (attname == NULL) {
8664
1.44M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
1.44M
         "xmlParseStartTag: problem parsing attributes\n");
8666
1.44M
      break;
8667
1.44M
  }
8668
3.28M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
3.85M
      for (i = 0; i < nbatts;i += 2) {
8675
1.18M
          if (xmlStrEqual(atts[i], attname)) {
8676
33.9k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
33.9k
        xmlFree(attvalue);
8678
33.9k
        goto failed;
8679
33.9k
    }
8680
1.18M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
2.66M
      if (atts == NULL) {
8685
109k
          maxatts = 22; /* allow for 10 attrs by default */
8686
109k
          atts = (const xmlChar **)
8687
109k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
109k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
109k
    ctxt->atts = atts;
8695
109k
    ctxt->maxatts = maxatts;
8696
2.55M
      } else if (nbatts + 4 > maxatts) {
8697
173
          const xmlChar **n;
8698
8699
173
          maxatts *= 2;
8700
173
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
173
               maxatts * sizeof(const xmlChar *));
8702
173
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
173
    atts = n;
8709
173
    ctxt->atts = atts;
8710
173
    ctxt->maxatts = maxatts;
8711
173
      }
8712
2.66M
      atts[nbatts++] = attname;
8713
2.66M
      atts[nbatts++] = attvalue;
8714
2.66M
      atts[nbatts] = NULL;
8715
2.66M
      atts[nbatts + 1] = NULL;
8716
2.66M
  } else {
8717
586k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
586k
  }
8720
8721
3.28M
failed:
8722
8723
3.28M
  GROW
8724
3.28M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
1.26M
      break;
8726
2.02M
  if (SKIP_BLANKS == 0) {
8727
1.07M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
1.07M
         "attributes construct error\n");
8729
1.07M
  }
8730
2.02M
  SHRINK;
8731
2.02M
        GROW;
8732
2.02M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
4.59M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
4.59M
  (!ctxt->disableSAX)) {
8739
4.22M
  if (nbatts > 0)
8740
1.69M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
2.52M
  else
8742
2.52M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
4.22M
    }
8744
8745
4.59M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
6.40M
        for (i = 1;i < nbatts;i+=2)
8748
2.66M
      if (atts[i] != NULL)
8749
2.66M
         xmlFree((xmlChar *) atts[i]);
8750
3.73M
    }
8751
4.59M
    return(name);
8752
4.59M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.42M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.42M
    const xmlChar *name;
8772
8773
1.42M
    GROW;
8774
1.42M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.42M
    SKIP(2);
8780
8781
1.42M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.42M
    GROW;
8787
1.42M
    SKIP_BLANKS;
8788
1.42M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
171k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
171k
    } else
8791
1.25M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.42M
    if (name != (xmlChar*)1) {
8800
346k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
346k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
346k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
346k
                    ctxt->name, line, name);
8804
346k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.42M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.42M
  (!ctxt->disableSAX))
8811
1.28M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.42M
    namePop(ctxt);
8814
1.42M
    spacePop(ctxt);
8815
1.42M
    return;
8816
1.42M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
9.17M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
9.17M
    int i;
8858
8859
9.17M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
26.0M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
18.8M
        if (ctxt->nsTab[i] == prefix) {
8862
1.57M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
26.8k
          return(NULL);
8864
1.54M
      return(ctxt->nsTab[i + 1]);
8865
1.57M
  }
8866
7.25M
    return(NULL);
8867
8.82M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
17.2M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
17.2M
    const xmlChar *l, *p;
8886
8887
17.2M
    GROW;
8888
8889
17.2M
    l = xmlParseNCName(ctxt);
8890
17.2M
    if (l == NULL) {
8891
1.92M
        if (CUR == ':') {
8892
41.4k
      l = xmlParseName(ctxt);
8893
41.4k
      if (l != NULL) {
8894
41.4k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
41.4k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
41.4k
    *prefix = NULL;
8897
41.4k
    return(l);
8898
41.4k
      }
8899
41.4k
  }
8900
1.88M
        return(NULL);
8901
1.92M
    }
8902
15.3M
    if (CUR == ':') {
8903
3.24M
        NEXT;
8904
3.24M
  p = l;
8905
3.24M
  l = xmlParseNCName(ctxt);
8906
3.24M
  if (l == NULL) {
8907
148k
      xmlChar *tmp;
8908
8909
148k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
148k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
148k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
148k
      l = xmlParseNmtoken(ctxt);
8914
148k
      if (l == NULL) {
8915
103k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
103k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
103k
            } else {
8919
44.7k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
44.7k
    xmlFree((char *)l);
8921
44.7k
      }
8922
148k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
148k
      if (tmp != NULL) xmlFree(tmp);
8924
148k
      *prefix = NULL;
8925
148k
      return(p);
8926
148k
  }
8927
3.09M
  if (CUR == ':') {
8928
97.9k
      xmlChar *tmp;
8929
8930
97.9k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
97.9k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
97.9k
      NEXT;
8933
97.9k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
97.9k
      if (tmp != NULL) {
8935
84.8k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
84.8k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
84.8k
    if (tmp != NULL) xmlFree(tmp);
8938
84.8k
    *prefix = p;
8939
84.8k
    return(l);
8940
84.8k
      }
8941
13.0k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
13.0k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
13.0k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
13.0k
      if (tmp != NULL) xmlFree(tmp);
8946
13.0k
      *prefix = p;
8947
13.0k
      return(l);
8948
13.0k
  }
8949
3.00M
  *prefix = p;
8950
3.00M
    } else
8951
12.0M
        *prefix = NULL;
8952
15.0M
    return(l);
8953
15.3M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
523k
                        xmlChar const *prefix) {
8971
523k
    const xmlChar *cmp;
8972
523k
    const xmlChar *in;
8973
523k
    const xmlChar *ret;
8974
523k
    const xmlChar *prefix2;
8975
8976
523k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
523k
    GROW;
8979
523k
    in = ctxt->input->cur;
8980
8981
523k
    cmp = prefix;
8982
1.23M
    while (*in != 0 && *in == *cmp) {
8983
707k
  ++in;
8984
707k
  ++cmp;
8985
707k
    }
8986
523k
    if ((*cmp == 0) && (*in == ':')) {
8987
404k
        in++;
8988
404k
  cmp = name;
8989
2.05M
  while (*in != 0 && *in == *cmp) {
8990
1.64M
      ++in;
8991
1.64M
      ++cmp;
8992
1.64M
  }
8993
404k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
246k
            ctxt->input->col += in - ctxt->input->cur;
8996
246k
      ctxt->input->cur = in;
8997
246k
      return((const xmlChar*) 1);
8998
246k
  }
8999
404k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
277k
    ret = xmlParseQName (ctxt, &prefix2);
9004
277k
    if ((ret == name) && (prefix == prefix2))
9005
8.86k
  return((const xmlChar*) 1);
9006
268k
    return ret;
9007
277k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
4.16k
    const xmlChar *oldbase = ctxt->input->base;\
9045
4.16k
    GROW;\
9046
4.16k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
4.16k
        return(NULL);\
9048
4.16k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
4.16k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
9.08M
{
9059
9.08M
    xmlChar limit = 0;
9060
9.08M
    const xmlChar *in = NULL, *start, *end, *last;
9061
9.08M
    xmlChar *ret = NULL;
9062
9.08M
    int line, col;
9063
9.08M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
2.40M
                    XML_MAX_HUGE_LENGTH :
9065
9.08M
                    XML_MAX_TEXT_LENGTH;
9066
9067
9.08M
    GROW;
9068
9.08M
    in = (xmlChar *) CUR_PTR;
9069
9.08M
    line = ctxt->input->line;
9070
9.08M
    col = ctxt->input->col;
9071
9.08M
    if (*in != '"' && *in != '\'') {
9072
137k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
137k
        return (NULL);
9074
137k
    }
9075
8.94M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
8.94M
    limit = *in++;
9083
8.94M
    col++;
9084
8.94M
    end = ctxt->input->end;
9085
8.94M
    start = in;
9086
8.94M
    if (in >= end) {
9087
364
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
364
    }
9089
8.94M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
569k
  while ((in < end) && (*in != limit) &&
9094
569k
         ((*in == 0x20) || (*in == 0x9) ||
9095
561k
          (*in == 0xA) || (*in == 0xD))) {
9096
168k
      if (*in == 0xA) {
9097
57.8k
          line++; col = 1;
9098
110k
      } else {
9099
110k
          col++;
9100
110k
      }
9101
168k
      in++;
9102
168k
      start = in;
9103
168k
      if (in >= end) {
9104
105
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
105
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
105
      }
9111
168k
  }
9112
3.24M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
3.24M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
2.85M
      col++;
9115
2.85M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
2.84M
      if (in >= end) {
9117
236
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
236
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
236
      }
9124
2.84M
  }
9125
400k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
417k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
531k
  while ((in < end) && (*in != limit) &&
9131
531k
         ((*in == 0x20) || (*in == 0x9) ||
9132
241k
          (*in == 0xA) || (*in == 0xD))) {
9133
130k
      if (*in == 0xA) {
9134
62.3k
          line++, col = 1;
9135
68.0k
      } else {
9136
68.0k
          col++;
9137
68.0k
      }
9138
130k
      in++;
9139
130k
      if (in >= end) {
9140
217
    const xmlChar *oldbase = ctxt->input->base;
9141
217
    GROW;
9142
217
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
217
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
217
    end = ctxt->input->end;
9151
217
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
217
      }
9157
130k
  }
9158
400k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
400k
  if (*in != limit) goto need_complex;
9164
8.54M
    } else {
9165
130M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
130M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
121M
      in++;
9168
121M
      col++;
9169
121M
      if (in >= end) {
9170
3.46k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
3.46k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
3.46k
      }
9177
121M
  }
9178
8.54M
  last = in;
9179
8.54M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
8.54M
  if (*in != limit) goto need_complex;
9185
8.54M
    }
9186
7.14M
    in++;
9187
7.14M
    col++;
9188
7.14M
    if (len != NULL) {
9189
4.97M
        if (alloc) *alloc = 0;
9190
4.97M
        *len = last - start;
9191
4.97M
        ret = (xmlChar *) start;
9192
4.97M
    } else {
9193
2.17M
        if (alloc) *alloc = 1;
9194
2.17M
        ret = xmlStrndup(start, last - start);
9195
2.17M
    }
9196
7.14M
    CUR_PTR = in;
9197
7.14M
    ctxt->input->line = line;
9198
7.14M
    ctxt->input->col = col;
9199
7.14M
    return ret;
9200
1.80M
need_complex:
9201
1.80M
    if (alloc) *alloc = 1;
9202
1.80M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
8.94M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
7.61M
{
9226
7.61M
    const xmlChar *name;
9227
7.61M
    xmlChar *val, *internal_val = NULL;
9228
7.61M
    int normalize = 0;
9229
9230
7.61M
    *value = NULL;
9231
7.61M
    GROW;
9232
7.61M
    name = xmlParseQName(ctxt, prefix);
9233
7.61M
    if (name == NULL) {
9234
1.09M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
1.09M
                       "error parsing attribute name\n");
9236
1.09M
        return (NULL);
9237
1.09M
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
6.52M
    if (ctxt->attsSpecial != NULL) {
9243
895k
        int type;
9244
9245
895k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
895k
                                                 pref, elem, *prefix, name);
9247
895k
        if (type != 0)
9248
407k
            normalize = 1;
9249
895k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
6.52M
    SKIP_BLANKS;
9255
6.52M
    if (RAW == '=') {
9256
6.07M
        NEXT;
9257
6.07M
        SKIP_BLANKS;
9258
6.07M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
6.07M
        if (val == NULL)
9260
75.0k
            return (NULL);
9261
5.99M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
400k
      if (*alloc) {
9269
111k
          const xmlChar *val2;
9270
9271
111k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
111k
    if ((val2 != NULL) && (val2 != val)) {
9273
12.8k
        xmlFree(val);
9274
12.8k
        val = (xmlChar *) val2;
9275
12.8k
    }
9276
111k
      }
9277
400k
  }
9278
5.99M
        ctxt->instate = XML_PARSER_CONTENT;
9279
5.99M
    } else {
9280
445k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
445k
                          "Specification mandates value for attribute %s\n",
9282
445k
                          name);
9283
445k
        return (name);
9284
445k
    }
9285
9286
5.99M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
306k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
93.6k
            internal_val = xmlStrndup(val, *len);
9294
93.6k
            if (!xmlCheckLanguageID(internal_val)) {
9295
77.3k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
77.3k
                              "Malformed value for xml:lang : %s\n",
9297
77.3k
                              internal_val, NULL);
9298
77.3k
            }
9299
93.6k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
306k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
4.44k
            internal_val = xmlStrndup(val, *len);
9306
4.44k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
107
                *(ctxt->space) = 0;
9308
4.34k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
504
                *(ctxt->space) = 1;
9310
3.83k
            else {
9311
3.83k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
3.83k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
3.83k
                              internal_val, NULL);
9314
3.83k
            }
9315
4.44k
        }
9316
306k
        if (internal_val) {
9317
98.0k
            xmlFree(internal_val);
9318
98.0k
        }
9319
306k
    }
9320
9321
5.99M
    *value = val;
9322
5.99M
    return (name);
9323
6.52M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
9.36M
                  const xmlChar **URI, int *tlen) {
9356
9.36M
    const xmlChar *localname;
9357
9.36M
    const xmlChar *prefix;
9358
9.36M
    const xmlChar *attname;
9359
9.36M
    const xmlChar *aprefix;
9360
9.36M
    const xmlChar *nsname;
9361
9.36M
    xmlChar *attvalue;
9362
9.36M
    const xmlChar **atts = ctxt->atts;
9363
9.36M
    int maxatts = ctxt->maxatts;
9364
9.36M
    int nratts, nbatts, nbdef, inputid;
9365
9.36M
    int i, j, nbNs, attval;
9366
9.36M
    unsigned long cur;
9367
9.36M
    int nsNr = ctxt->nsNr;
9368
9369
9.36M
    if (RAW != '<') return(NULL);
9370
9.36M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
9.36M
    SHRINK;
9380
9.36M
    cur = ctxt->input->cur - ctxt->input->base;
9381
9.36M
    inputid = ctxt->input->id;
9382
9.36M
    nbatts = 0;
9383
9.36M
    nratts = 0;
9384
9.36M
    nbdef = 0;
9385
9.36M
    nbNs = 0;
9386
9.36M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
9.36M
    ctxt->nsNr = nsNr;
9389
9390
9.36M
    localname = xmlParseQName(ctxt, &prefix);
9391
9.36M
    if (localname == NULL) {
9392
770k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
770k
           "StartTag: invalid element name\n");
9394
770k
        return(NULL);
9395
770k
    }
9396
8.59M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
8.59M
    SKIP_BLANKS;
9404
8.59M
    GROW;
9405
9406
10.8M
    while (((RAW != '>') &&
9407
10.8M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
10.8M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
7.61M
  int len = -1, alloc = 0;
9410
9411
7.61M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
7.61M
                               &aprefix, &attvalue, &len, &alloc);
9413
7.61M
        if (attname == NULL) {
9414
1.17M
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
1.17M
           "xmlParseStartTag: problem parsing attributes\n");
9416
1.17M
      break;
9417
1.17M
  }
9418
6.44M
        if (attvalue == NULL)
9419
445k
            goto next_attr;
9420
5.99M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
5.99M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
313k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
313k
            xmlURIPtr uri;
9425
9426
313k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
313k
            if (*URL != 0) {
9434
303k
                uri = xmlParseURI((const char *) URL);
9435
303k
                if (uri == NULL) {
9436
126k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
126k
                             "xmlns: '%s' is not a valid URI\n",
9438
126k
                                       URL, NULL, NULL);
9439
177k
                } else {
9440
177k
                    if (uri->scheme == NULL) {
9441
48.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
48.7k
                                  "xmlns: URI %s is not absolute\n",
9443
48.7k
                                  URL, NULL, NULL);
9444
48.7k
                    }
9445
177k
                    xmlFreeURI(uri);
9446
177k
                }
9447
303k
                if (URL == ctxt->str_xml_ns) {
9448
20
                    if (attname != ctxt->str_xml) {
9449
20
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
20
                     "xml namespace URI cannot be the default namespace\n",
9451
20
                                 NULL, NULL, NULL);
9452
20
                    }
9453
20
                    goto next_attr;
9454
20
                }
9455
303k
                if ((len == 29) &&
9456
303k
                    (xmlStrEqual(URL,
9457
19.8k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
569
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
569
                         "reuse of the xmlns namespace name is forbidden\n",
9460
569
                             NULL, NULL, NULL);
9461
569
                    goto next_attr;
9462
569
                }
9463
303k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
376k
            for (j = 1;j <= nbNs;j++)
9468
73.8k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
10.1k
                    break;
9470
312k
            if (j <= nbNs)
9471
10.1k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
302k
            else
9473
302k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
5.68M
        } else if (aprefix == ctxt->str_xmlns) {
9476
506k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
506k
            xmlURIPtr uri;
9478
9479
506k
            if (attname == ctxt->str_xml) {
9480
7.55k
                if (URL != ctxt->str_xml_ns) {
9481
7.51k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
7.51k
                             "xml namespace prefix mapped to wrong URI\n",
9483
7.51k
                             NULL, NULL, NULL);
9484
7.51k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
7.55k
                goto next_attr;
9489
7.55k
            }
9490
498k
            if (URL == ctxt->str_xml_ns) {
9491
184
                if (attname != ctxt->str_xml) {
9492
184
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
184
                             "xml namespace URI mapped to wrong prefix\n",
9494
184
                             NULL, NULL, NULL);
9495
184
                }
9496
184
                goto next_attr;
9497
184
            }
9498
498k
            if (attname == ctxt->str_xmlns) {
9499
8.01k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
8.01k
                         "redefinition of the xmlns prefix is forbidden\n",
9501
8.01k
                         NULL, NULL, NULL);
9502
8.01k
                goto next_attr;
9503
8.01k
            }
9504
490k
            if ((len == 29) &&
9505
490k
                (xmlStrEqual(URL,
9506
15.1k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
3.31k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
3.31k
                         "reuse of the xmlns namespace name is forbidden\n",
9509
3.31k
                         NULL, NULL, NULL);
9510
3.31k
                goto next_attr;
9511
3.31k
            }
9512
487k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
16.6k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
16.6k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
16.6k
                              attname, NULL, NULL);
9516
16.6k
                goto next_attr;
9517
470k
            } else {
9518
470k
                uri = xmlParseURI((const char *) URL);
9519
470k
                if (uri == NULL) {
9520
146k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
146k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
146k
                                       attname, URL, NULL);
9523
324k
                } else {
9524
324k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
37.8k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
37.8k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
37.8k
                                  attname, URL, NULL);
9528
37.8k
                    }
9529
324k
                    xmlFreeURI(uri);
9530
324k
                }
9531
470k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
617k
            for (j = 1;j <= nbNs;j++)
9537
162k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
16.0k
                    break;
9539
470k
            if (j <= nbNs)
9540
16.0k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
454k
            else
9542
454k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
5.18M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
5.18M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
142k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
142k
                maxatts = ctxt->maxatts;
9553
142k
                atts = ctxt->atts;
9554
142k
            }
9555
5.18M
            ctxt->attallocs[nratts++] = alloc;
9556
5.18M
            atts[nbatts++] = attname;
9557
5.18M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
5.18M
            if (alloc)
9565
772k
                atts[nbatts++] = NULL;
9566
4.40M
            else
9567
4.40M
                atts[nbatts++] = ctxt->input->base;
9568
5.18M
            atts[nbatts++] = attvalue;
9569
5.18M
            attvalue += len;
9570
5.18M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
5.18M
            if (alloc != 0) attval = 1;
9575
5.18M
            attvalue = NULL; /* moved into atts */
9576
5.18M
        }
9577
9578
6.44M
next_attr:
9579
6.44M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
253k
            xmlFree(attvalue);
9581
253k
            attvalue = NULL;
9582
253k
        }
9583
9584
6.44M
  GROW
9585
6.44M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
6.44M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
2.99M
      break;
9589
3.45M
  if (SKIP_BLANKS == 0) {
9590
1.15M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
1.15M
         "attributes construct error\n");
9592
1.15M
      break;
9593
1.15M
  }
9594
2.29M
        GROW;
9595
2.29M
    }
9596
9597
8.59M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
13.7M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
5.18M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
4.40M
            const xmlChar *old = atts[i+2];
9612
4.40M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
4.40M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
4.40M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
4.40M
        }
9616
5.18M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
8.59M
    if (ctxt->attsDefault != NULL) {
9622
898k
        xmlDefAttrsPtr defaults;
9623
9624
898k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
898k
  if (defaults != NULL) {
9626
289k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
182k
          attname = defaults->values[5 * i];
9628
182k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
182k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
13.8k
        for (j = 1;j <= nbNs;j++)
9638
5.66k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
2.90k
          break;
9640
11.0k
              if (j <= nbNs) continue;
9641
9642
8.13k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
8.13k
        if (nsname != defaults->values[5 * i + 2]) {
9644
4.40k
      if (nsPush(ctxt, NULL,
9645
4.40k
                 defaults->values[5 * i + 2]) > 0)
9646
3.90k
          nbNs++;
9647
4.40k
        }
9648
171k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
15.7k
        for (j = 1;j <= nbNs;j++)
9653
4.75k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
1.69k
          break;
9655
12.6k
              if (j <= nbNs) continue;
9656
9657
10.9k
        nsname = xmlGetNamespace(ctxt, attname);
9658
10.9k
        if (nsname != defaults->values[5 * i + 2]) {
9659
5.36k
      if (nsPush(ctxt, attname,
9660
5.36k
                 defaults->values[5 * i + 2]) > 0)
9661
5.02k
          nbNs++;
9662
5.36k
        }
9663
158k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
384k
        for (j = 0;j < nbatts;j+=5) {
9668
230k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
4.78k
          break;
9670
230k
        }
9671
158k
        if (j < nbatts) continue;
9672
9673
153k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
5.08k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
5.08k
      maxatts = ctxt->maxatts;
9679
5.08k
      atts = ctxt->atts;
9680
5.08k
        }
9681
153k
        atts[nbatts++] = attname;
9682
153k
        atts[nbatts++] = aprefix;
9683
153k
        if (aprefix == NULL)
9684
123k
      atts[nbatts++] = NULL;
9685
30.7k
        else
9686
30.7k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
153k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
153k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
153k
        if ((ctxt->standalone == 1) &&
9690
153k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
153k
        nbdef++;
9696
153k
    }
9697
182k
      }
9698
106k
  }
9699
898k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
13.9M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
5.33M
  if (atts[i + 1] != NULL) {
9709
535k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
535k
      if (nsname == NULL) {
9711
158k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
158k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
158k
        atts[i + 1], atts[i], localname);
9714
158k
      }
9715
535k
      atts[i + 2] = nsname;
9716
535k
  } else
9717
4.79M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
7.66M
        for (j = 0; j < i;j += 5) {
9725
2.37M
      if (atts[i] == atts[j]) {
9726
95.5k
          if (atts[i+1] == atts[j+1]) {
9727
46.7k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
46.7k
        break;
9729
46.7k
    }
9730
48.8k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
48
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
48
           "Namespaced Attribute %s in '%s' redefined\n",
9733
48
           atts[i], nsname, NULL);
9734
48
        break;
9735
48
    }
9736
48.8k
      }
9737
2.37M
  }
9738
5.33M
    }
9739
9740
8.59M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
8.59M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
1.01M
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
1.01M
           "Namespace prefix %s on %s is not defined\n",
9744
1.01M
     prefix, localname, NULL);
9745
1.01M
    }
9746
8.59M
    *pref = prefix;
9747
8.59M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
8.59M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
8.59M
  (!ctxt->disableSAX)) {
9754
7.50M
  if (nbNs > 0)
9755
444k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
444k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
444k
        nbatts / 5, nbdef, atts);
9758
7.05M
  else
9759
7.05M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
7.05M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
7.50M
    }
9762
9763
8.59M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
8.59M
    if (attval != 0) {
9768
1.66M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
951k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
772k
          xmlFree((xmlChar *) atts[i]);
9771
715k
    }
9772
9773
8.59M
    return(localname);
9774
8.59M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
2.93M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
2.93M
    const xmlChar *name;
9794
9795
2.93M
    GROW;
9796
2.93M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
2.93M
    SKIP(2);
9801
9802
2.93M
    if (tag->prefix == NULL)
9803
2.40M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
523k
    else
9805
523k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
2.93M
    GROW;
9811
2.93M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
2.93M
    SKIP_BLANKS;
9814
2.93M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
259k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
259k
    } else
9817
2.67M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
2.93M
    if (name != (xmlChar*)1) {
9826
539k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
539k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
539k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
539k
                    ctxt->name, tag->line, name);
9830
539k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
2.93M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
2.93M
  (!ctxt->disableSAX))
9837
2.49M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
2.49M
                                tag->URI);
9839
9840
2.93M
    spacePop(ctxt);
9841
2.93M
    if (tag->nsNr != 0)
9842
68.3k
  nsPop(ctxt, tag->nsNr);
9843
2.93M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
133k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
133k
    xmlChar *buf = NULL;
9864
133k
    int len = 0;
9865
133k
    int size = XML_PARSER_BUFFER_SIZE;
9866
133k
    int r, rl;
9867
133k
    int s, sl;
9868
133k
    int cur, l;
9869
133k
    int count = 0;
9870
133k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
48.5k
                    XML_MAX_HUGE_LENGTH :
9872
133k
                    XML_MAX_TEXT_LENGTH;
9873
9874
133k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
133k
    SKIP(3);
9877
9878
133k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
133k
    SKIP(6);
9881
9882
133k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
133k
    r = CUR_CHAR(rl);
9884
133k
    if (!IS_CHAR(r)) {
9885
7.90k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
7.90k
        goto out;
9887
7.90k
    }
9888
125k
    NEXTL(rl);
9889
125k
    s = CUR_CHAR(sl);
9890
125k
    if (!IS_CHAR(s)) {
9891
11.4k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
11.4k
        goto out;
9893
11.4k
    }
9894
114k
    NEXTL(sl);
9895
114k
    cur = CUR_CHAR(l);
9896
114k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
114k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
19.2M
    while (IS_CHAR(cur) &&
9902
19.2M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
19.1M
  if (len + 5 >= size) {
9904
57.3k
      xmlChar *tmp;
9905
9906
57.3k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
57.3k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
57.3k
      buf = tmp;
9912
57.3k
      size *= 2;
9913
57.3k
  }
9914
19.1M
  COPY_BUF(rl,buf,len,r);
9915
19.1M
  r = s;
9916
19.1M
  rl = sl;
9917
19.1M
  s = cur;
9918
19.1M
  sl = l;
9919
19.1M
  count++;
9920
19.1M
  if (count > 50) {
9921
343k
      SHRINK;
9922
343k
      GROW;
9923
343k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
343k
      count = 0;
9927
343k
  }
9928
19.1M
  NEXTL(l);
9929
19.1M
  cur = CUR_CHAR(l);
9930
19.1M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
19.1M
    }
9936
114k
    buf[len] = 0;
9937
114k
    if (cur != '>') {
9938
23.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
23.7k
                       "CData section not finished\n%.50s\n", buf);
9940
23.7k
        goto out;
9941
23.7k
    }
9942
90.6k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
90.6k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
66.9k
  if (ctxt->sax->cdataBlock != NULL)
9949
41.4k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
25.4k
  else if (ctxt->sax->characters != NULL)
9951
25.4k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
66.9k
    }
9953
9954
133k
out:
9955
133k
    if (ctxt->instate != XML_PARSER_EOF)
9956
133k
        ctxt->instate = XML_PARSER_CONTENT;
9957
133k
    xmlFree(buf);
9958
133k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
219k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
219k
    int nameNr = ctxt->nameNr;
9971
9972
219k
    GROW;
9973
20.4M
    while ((RAW != 0) &&
9974
20.4M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
20.2M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
20.2M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
166k
      xmlParsePI(ctxt);
9982
166k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
20.1M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
133k
      xmlParseCDSect(ctxt);
9990
133k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
19.9M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
19.9M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
216k
      xmlParseComment(ctxt);
9998
216k
      ctxt->instate = XML_PARSER_CONTENT;
9999
216k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
19.7M
  else if (*cur == '<') {
10005
7.97M
            if (NXT(1) == '/') {
10006
1.85M
                if (ctxt->nameNr <= nameNr)
10007
32.9k
                    break;
10008
1.82M
          xmlParseElementEnd(ctxt);
10009
6.11M
            } else {
10010
6.11M
          xmlParseElementStart(ctxt);
10011
6.11M
            }
10012
7.97M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
11.7M
  else if (*cur == '&') {
10020
2.20M
      xmlParseReference(ctxt);
10021
2.20M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
9.58M
  else {
10027
9.58M
      xmlParseCharData(ctxt, 0);
10028
9.58M
  }
10029
10030
20.2M
  GROW;
10031
20.2M
  SHRINK;
10032
20.2M
    }
10033
219k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
99.2k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
99.2k
    int nameNr = ctxt->nameNr;
10047
10048
99.2k
    xmlParseContentInternal(ctxt);
10049
10050
99.2k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
4.15k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
4.15k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
4.15k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
4.15k
                "Premature end of data in tag %s line %d\n",
10055
4.15k
    name, line, NULL);
10056
4.15k
    }
10057
99.2k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
202k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
202k
    if (xmlParseElementStart(ctxt) != 0)
10078
81.7k
        return;
10079
10080
120k
    xmlParseContentInternal(ctxt);
10081
120k
    if (ctxt->instate == XML_PARSER_EOF)
10082
434
  return;
10083
10084
120k
    if (CUR == 0) {
10085
88.5k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
88.5k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
88.5k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
88.5k
                "Premature end of data in tag %s line %d\n",
10089
88.5k
    name, line, NULL);
10090
88.5k
        return;
10091
88.5k
    }
10092
10093
31.7k
    xmlParseElementEnd(ctxt);
10094
31.7k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
6.32M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
6.32M
    const xmlChar *name;
10108
6.32M
    const xmlChar *prefix = NULL;
10109
6.32M
    const xmlChar *URI = NULL;
10110
6.32M
    xmlParserNodeInfo node_info;
10111
6.32M
    int line, tlen = 0;
10112
6.32M
    xmlNodePtr ret;
10113
6.32M
    int nsNr = ctxt->nsNr;
10114
10115
6.32M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
6.32M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
180
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
180
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
180
        xmlParserMaxDepth);
10120
180
  xmlHaltParser(ctxt);
10121
180
  return(-1);
10122
180
    }
10123
10124
    /* Capture start position */
10125
6.32M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
6.32M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
6.32M
    else if (*ctxt->space == -2)
10134
1.48M
  spacePush(ctxt, -1);
10135
4.83M
    else
10136
4.83M
  spacePush(ctxt, *ctxt->space);
10137
10138
6.32M
    line = ctxt->input->line;
10139
6.32M
#ifdef LIBXML_SAX1_ENABLED
10140
6.32M
    if (ctxt->sax2)
10141
4.21M
#endif /* LIBXML_SAX1_ENABLED */
10142
4.21M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
2.11M
#ifdef LIBXML_SAX1_ENABLED
10144
2.11M
    else
10145
2.11M
  name = xmlParseStartTag(ctxt);
10146
6.32M
#endif /* LIBXML_SAX1_ENABLED */
10147
6.32M
    if (ctxt->instate == XML_PARSER_EOF)
10148
205
  return(-1);
10149
6.32M
    if (name == NULL) {
10150
1.14M
  spacePop(ctxt);
10151
1.14M
        return(-1);
10152
1.14M
    }
10153
5.17M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
5.17M
    ret = ctxt->node;
10155
10156
5.17M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
5.17M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
5.17M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
5.17M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
5.17M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
1.04M
        SKIP(2);
10172
1.04M
  if (ctxt->sax2) {
10173
771k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
771k
    (!ctxt->disableSAX))
10175
562k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
771k
#ifdef LIBXML_SAX1_ENABLED
10177
771k
  } else {
10178
274k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
274k
    (!ctxt->disableSAX))
10180
219k
    ctxt->sax->endElement(ctxt->userData, name);
10181
274k
#endif /* LIBXML_SAX1_ENABLED */
10182
274k
  }
10183
1.04M
  namePop(ctxt);
10184
1.04M
  spacePop(ctxt);
10185
1.04M
  if (nsNr != ctxt->nsNr)
10186
19.5k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
1.04M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
1.04M
  return(1);
10195
1.04M
    }
10196
4.12M
    if (RAW == '>') {
10197
2.84M
        NEXT1;
10198
2.84M
    } else {
10199
1.28M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
1.28M
         "Couldn't find end of Start Tag %s line %d\n",
10201
1.28M
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
1.28M
  nodePop(ctxt);
10207
1.28M
  namePop(ctxt);
10208
1.28M
  spacePop(ctxt);
10209
1.28M
  if (nsNr != ctxt->nsNr)
10210
110k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
1.28M
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
1.28M
  return(-1);
10223
1.28M
    }
10224
10225
2.84M
    return(0);
10226
4.12M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.85M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.85M
    xmlParserNodeInfo node_info;
10237
1.85M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.85M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.85M
    if (ctxt->sax2) {
10249
1.26M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
1.26M
  namePop(ctxt);
10251
1.26M
    }
10252
587k
#ifdef LIBXML_SAX1_ENABLED
10253
587k
    else
10254
587k
  xmlParseEndTag1(ctxt, 0);
10255
1.85M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.85M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.85M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
274k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
274k
    xmlChar *buf = NULL;
10286
274k
    int len = 0;
10287
274k
    int size = 10;
10288
274k
    xmlChar cur;
10289
10290
274k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
274k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
274k
    cur = CUR;
10296
274k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
7.45k
  xmlFree(buf);
10298
7.45k
  return(NULL);
10299
7.45k
    }
10300
266k
    buf[len++] = cur;
10301
266k
    NEXT;
10302
266k
    cur=CUR;
10303
266k
    if (cur != '.') {
10304
3.80k
  xmlFree(buf);
10305
3.80k
  return(NULL);
10306
3.80k
    }
10307
262k
    buf[len++] = cur;
10308
262k
    NEXT;
10309
262k
    cur=CUR;
10310
704k
    while ((cur >= '0') && (cur <= '9')) {
10311
441k
  if (len + 1 >= size) {
10312
1.74k
      xmlChar *tmp;
10313
10314
1.74k
      size *= 2;
10315
1.74k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.74k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.74k
      buf = tmp;
10322
1.74k
  }
10323
441k
  buf[len++] = cur;
10324
441k
  NEXT;
10325
441k
  cur=CUR;
10326
441k
    }
10327
262k
    buf[len] = 0;
10328
262k
    return(buf);
10329
262k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
361k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
361k
    xmlChar *version = NULL;
10349
10350
361k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
288k
  SKIP(7);
10352
288k
  SKIP_BLANKS;
10353
288k
  if (RAW != '=') {
10354
8.86k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
8.86k
      return(NULL);
10356
8.86k
        }
10357
279k
  NEXT;
10358
279k
  SKIP_BLANKS;
10359
279k
  if (RAW == '"') {
10360
230k
      NEXT;
10361
230k
      version = xmlParseVersionNum(ctxt);
10362
230k
      if (RAW != '"') {
10363
17.7k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
17.7k
      } else
10365
212k
          NEXT;
10366
230k
  } else if (RAW == '\''){
10367
43.6k
      NEXT;
10368
43.6k
      version = xmlParseVersionNum(ctxt);
10369
43.6k
      if (RAW != '\'') {
10370
1.95k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
1.95k
      } else
10372
41.7k
          NEXT;
10373
43.6k
  } else {
10374
5.71k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
5.71k
  }
10376
279k
    }
10377
352k
    return(version);
10378
361k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
96.4k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
96.4k
    xmlChar *buf = NULL;
10395
96.4k
    int len = 0;
10396
96.4k
    int size = 10;
10397
96.4k
    xmlChar cur;
10398
10399
96.4k
    cur = CUR;
10400
96.4k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
96.4k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
95.2k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
95.2k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
95.2k
  buf[len++] = cur;
10409
95.2k
  NEXT;
10410
95.2k
  cur = CUR;
10411
1.05M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
1.05M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
1.05M
         ((cur >= '0') && (cur <= '9')) ||
10414
1.05M
         (cur == '.') || (cur == '_') ||
10415
1.05M
         (cur == '-')) {
10416
962k
      if (len + 1 >= size) {
10417
43.0k
          xmlChar *tmp;
10418
10419
43.0k
    size *= 2;
10420
43.0k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
43.0k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
43.0k
    buf = tmp;
10427
43.0k
      }
10428
962k
      buf[len++] = cur;
10429
962k
      NEXT;
10430
962k
      cur = CUR;
10431
962k
      if (cur == 0) {
10432
532
          SHRINK;
10433
532
    GROW;
10434
532
    cur = CUR;
10435
532
      }
10436
962k
        }
10437
95.2k
  buf[len] = 0;
10438
95.2k
    } else {
10439
1.21k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.21k
    }
10441
96.4k
    return(buf);
10442
96.4k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
273k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
273k
    xmlChar *encoding = NULL;
10462
10463
273k
    SKIP_BLANKS;
10464
273k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
98.1k
  SKIP(8);
10466
98.1k
  SKIP_BLANKS;
10467
98.1k
  if (RAW != '=') {
10468
781
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
781
      return(NULL);
10470
781
        }
10471
97.3k
  NEXT;
10472
97.3k
  SKIP_BLANKS;
10473
97.3k
  if (RAW == '"') {
10474
71.5k
      NEXT;
10475
71.5k
      encoding = xmlParseEncName(ctxt);
10476
71.5k
      if (RAW != '"') {
10477
4.74k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
4.74k
    xmlFree((xmlChar *) encoding);
10479
4.74k
    return(NULL);
10480
4.74k
      } else
10481
66.7k
          NEXT;
10482
71.5k
  } else if (RAW == '\''){
10483
24.9k
      NEXT;
10484
24.9k
      encoding = xmlParseEncName(ctxt);
10485
24.9k
      if (RAW != '\'') {
10486
1.85k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
1.85k
    xmlFree((xmlChar *) encoding);
10488
1.85k
    return(NULL);
10489
1.85k
      } else
10490
23.0k
          NEXT;
10491
24.9k
  } else {
10492
904
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
904
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
90.7k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
26.2k
      xmlFree((xmlChar *) encoding);
10500
26.2k
            return(NULL);
10501
26.2k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
64.5k
        if ((encoding != NULL) &&
10508
64.5k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
63.9k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
2.52k
      if ((ctxt->encoding == NULL) &&
10517
2.52k
          (ctxt->input->buf != NULL) &&
10518
2.52k
          (ctxt->input->buf->encoder == NULL)) {
10519
2.52k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
2.52k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
2.52k
      }
10522
2.52k
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
2.52k
      ctxt->encoding = encoding;
10525
2.52k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
62.0k
        else if ((encoding != NULL) &&
10530
62.0k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
61.4k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
21.6k
      if (ctxt->encoding != NULL)
10533
19
    xmlFree((xmlChar *) ctxt->encoding);
10534
21.6k
      ctxt->encoding = encoding;
10535
21.6k
  }
10536
40.3k
  else if (encoding != NULL) {
10537
39.8k
      xmlCharEncodingHandlerPtr handler;
10538
10539
39.8k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
39.8k
      ctxt->input->encoding = encoding;
10542
10543
39.8k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
39.8k
      if (handler != NULL) {
10545
38.8k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
210
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
210
        return(NULL);
10549
210
    }
10550
38.8k
      } else {
10551
986
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
986
      "Unsupported encoding %s\n", encoding);
10553
986
    return(NULL);
10554
986
      }
10555
39.8k
  }
10556
64.5k
    }
10557
238k
    return(encoding);
10558
273k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
238k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
238k
    int standalone = -2;
10596
10597
238k
    SKIP_BLANKS;
10598
238k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
50.8k
  SKIP(10);
10600
50.8k
        SKIP_BLANKS;
10601
50.8k
  if (RAW != '=') {
10602
1.10k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
1.10k
      return(standalone);
10604
1.10k
        }
10605
49.7k
  NEXT;
10606
49.7k
  SKIP_BLANKS;
10607
49.7k
        if (RAW == '\''){
10608
10.1k
      NEXT;
10609
10.1k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
8.35k
          standalone = 0;
10611
8.35k
                SKIP(2);
10612
8.35k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
1.76k
                 (NXT(2) == 's')) {
10614
1.44k
          standalone = 1;
10615
1.44k
    SKIP(3);
10616
1.44k
            } else {
10617
312
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
312
      }
10619
10.1k
      if (RAW != '\'') {
10620
456
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
456
      } else
10622
9.65k
          NEXT;
10623
39.6k
  } else if (RAW == '"'){
10624
38.3k
      NEXT;
10625
38.3k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
17.0k
          standalone = 0;
10627
17.0k
    SKIP(2);
10628
21.3k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
21.3k
                 (NXT(2) == 's')) {
10630
20.0k
          standalone = 1;
10631
20.0k
                SKIP(3);
10632
20.0k
            } else {
10633
1.29k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
1.29k
      }
10635
38.3k
      if (RAW != '"') {
10636
1.94k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
1.94k
      } else
10638
36.4k
          NEXT;
10639
38.3k
  } else {
10640
1.28k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
1.28k
        }
10642
49.7k
    }
10643
237k
    return(standalone);
10644
238k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
352k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
352k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
352k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
352k
    SKIP(5);
10672
10673
352k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
352k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
352k
    version = xmlParseVersionInfo(ctxt);
10683
352k
    if (version == NULL) {
10684
96.0k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
256k
    } else {
10686
256k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
25.4k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
8.30k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
8.30k
                "Unsupported version '%s'\n",
10693
8.30k
                version);
10694
17.1k
      } else {
10695
17.1k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
3.29k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
3.29k
                      "Unsupported version '%s'\n",
10698
3.29k
          version, NULL);
10699
13.8k
    } else {
10700
13.8k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
13.8k
              "Unsupported version '%s'\n",
10702
13.8k
              version);
10703
13.8k
    }
10704
17.1k
      }
10705
25.4k
  }
10706
256k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
256k
  ctxt->version = version;
10709
256k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
352k
    if (!IS_BLANK_CH(RAW)) {
10715
197k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
88.4k
      SKIP(2);
10717
88.4k
      return;
10718
88.4k
  }
10719
109k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
109k
    }
10721
263k
    xmlParseEncodingDecl(ctxt);
10722
263k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
263k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
966
        return;
10728
966
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
262k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
28.8k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
24.1k
      SKIP(2);
10736
24.1k
      return;
10737
24.1k
  }
10738
4.68k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
4.68k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
238k
    GROW;
10745
10746
238k
    SKIP_BLANKS;
10747
238k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
238k
    SKIP_BLANKS;
10750
238k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
90.3k
        SKIP(2);
10752
148k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
2.49k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
2.49k
  NEXT;
10756
145k
    } else {
10757
145k
        int c;
10758
10759
145k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
6.07M
        while ((c = CUR) != 0) {
10761
6.07M
            NEXT;
10762
6.07M
            if (c == '>')
10763
138k
                break;
10764
6.07M
        }
10765
145k
    }
10766
238k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
578k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
666k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
666k
        SKIP_BLANKS;
10783
666k
        GROW;
10784
666k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
63.9k
      xmlParsePI(ctxt);
10786
602k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
23.3k
      xmlParseComment(ctxt);
10788
578k
        } else {
10789
578k
            break;
10790
578k
        }
10791
666k
    }
10792
578k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
293k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
293k
    xmlChar start[4];
10812
293k
    xmlCharEncoding enc;
10813
10814
293k
    xmlInitParser();
10815
10816
293k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
293k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
293k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
293k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
293k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
293k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
293k
    if ((ctxt->encoding == NULL) &&
10835
293k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
288k
  start[0] = RAW;
10842
288k
  start[1] = NXT(1);
10843
288k
  start[2] = NXT(2);
10844
288k
  start[3] = NXT(3);
10845
288k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
288k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
145k
      xmlSwitchEncoding(ctxt, enc);
10848
145k
  }
10849
288k
    }
10850
10851
10852
293k
    if (CUR == 0) {
10853
1.40k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
1.40k
  return(-1);
10855
1.40k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
292k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
17.8k
       GROW;
10865
17.8k
    }
10866
292k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
117k
  xmlParseXMLDecl(ctxt);
10872
117k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
117k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
322
      return(-1);
10878
322
  }
10879
117k
  ctxt->standalone = ctxt->input->standalone;
10880
117k
  SKIP_BLANKS;
10881
175k
    } else {
10882
175k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
175k
    }
10884
292k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
273k
        ctxt->sax->startDocument(ctxt->userData);
10886
292k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
292k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
292k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
292k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
292k
    GROW;
10903
292k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
142k
  ctxt->inSubset = 1;
10906
142k
  xmlParseDocTypeDecl(ctxt);
10907
142k
  if (RAW == '[') {
10908
108k
      ctxt->instate = XML_PARSER_DTD;
10909
108k
      xmlParseInternalSubset(ctxt);
10910
108k
      if (ctxt->instate == XML_PARSER_EOF)
10911
49.8k
    return(-1);
10912
108k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
93.1k
  ctxt->inSubset = 2;
10918
93.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
93.1k
      (!ctxt->disableSAX))
10920
84.4k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
84.4k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
93.1k
  if (ctxt->instate == XML_PARSER_EOF)
10923
8.74k
      return(-1);
10924
84.3k
  ctxt->inSubset = 0;
10925
10926
84.3k
        xmlCleanSpecialAttr(ctxt);
10927
10928
84.3k
  ctxt->instate = XML_PARSER_PROLOG;
10929
84.3k
  xmlParseMisc(ctxt);
10930
84.3k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
233k
    GROW;
10936
233k
    if (RAW != '<') {
10937
31.1k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
31.1k
           "Start tag expected, '<' not found\n");
10939
202k
    } else {
10940
202k
  ctxt->instate = XML_PARSER_CONTENT;
10941
202k
  xmlParseElement(ctxt);
10942
202k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
202k
  xmlParseMisc(ctxt);
10949
10950
202k
  if (RAW != 0) {
10951
80.2k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
80.2k
  }
10953
202k
  ctxt->instate = XML_PARSER_EOF;
10954
202k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
233k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
233k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
233k
    if ((ctxt->myDoc != NULL) &&
10966
233k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
660
  xmlFreeDoc(ctxt->myDoc);
10968
660
  ctxt->myDoc = NULL;
10969
660
    }
10970
10971
233k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
9.76k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
9.76k
  if (ctxt->valid)
10974
6.94k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
9.76k
  if (ctxt->nsWellFormed)
10976
9.36k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
9.76k
  if (ctxt->options & XML_PARSE_OLD10)
10978
1.54k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
9.76k
    }
10980
233k
    if (! ctxt->wellFormed) {
10981
223k
  ctxt->valid = 0;
10982
223k
  return(-1);
10983
223k
    }
10984
9.76k
    return(0);
10985
233k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
4.35M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
4.35M
    const xmlChar *cur;
11110
11111
4.35M
    if (ctxt->checkIndex == 0) {
11112
4.03M
        cur = ctxt->input->cur + 1;
11113
4.03M
    } else {
11114
325k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
325k
    }
11116
11117
4.35M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
348k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
348k
        return(0);
11120
4.00M
    } else {
11121
4.00M
        ctxt->checkIndex = 0;
11122
4.00M
        return(1);
11123
4.00M
    }
11124
4.35M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
2.35M
                     const char *str, size_t strLen) {
11138
2.35M
    const xmlChar *cur, *term;
11139
11140
2.35M
    if (ctxt->checkIndex == 0) {
11141
1.09M
        cur = ctxt->input->cur + startDelta;
11142
1.25M
    } else {
11143
1.25M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.25M
    }
11145
11146
2.35M
    term = BAD_CAST strstr((const char *) cur, str);
11147
2.35M
    if (term == NULL) {
11148
1.46M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
1.46M
        if ((size_t) (end - cur) < strLen)
11152
58.2k
            end = cur;
11153
1.40M
        else
11154
1.40M
            end -= strLen - 1;
11155
1.46M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
1.46M
    } else {
11157
884k
        ctxt->checkIndex = 0;
11158
884k
    }
11159
11160
2.35M
    return(term);
11161
2.35M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
9.56M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
9.56M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
9.56M
    const xmlChar *end = ctxt->input->end;
11173
11174
259M
    while (cur < end) {
11175
258M
        if ((*cur == '<') || (*cur == '&')) {
11176
8.81M
            ctxt->checkIndex = 0;
11177
8.81M
            return(1);
11178
8.81M
        }
11179
249M
        cur++;
11180
249M
    }
11181
11182
741k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
741k
    return(0);
11184
9.56M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
8.27M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
8.27M
    const xmlChar *cur;
11196
8.27M
    const xmlChar *end = ctxt->input->end;
11197
8.27M
    int state = ctxt->endCheckState;
11198
11199
8.27M
    if (ctxt->checkIndex == 0)
11200
6.12M
        cur = ctxt->input->cur + 1;
11201
2.15M
    else
11202
2.15M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
621M
    while (cur < end) {
11205
619M
        if (state) {
11206
309M
            if (*cur == state)
11207
7.76M
                state = 0;
11208
309M
        } else if (*cur == '\'' || *cur == '"') {
11209
7.85M
            state = *cur;
11210
301M
        } else if (*cur == '>') {
11211
5.96M
            ctxt->checkIndex = 0;
11212
5.96M
            ctxt->endCheckState = 0;
11213
5.96M
            return(1);
11214
5.96M
        }
11215
613M
        cur++;
11216
613M
    }
11217
11218
2.30M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
2.30M
    ctxt->endCheckState = state;
11220
2.30M
    return(0);
11221
8.27M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
525k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
525k
    const xmlChar *cur, *start;
11240
525k
    const xmlChar *end = ctxt->input->end;
11241
525k
    int state = ctxt->endCheckState;
11242
11243
525k
    if (ctxt->checkIndex == 0) {
11244
182k
        cur = ctxt->input->cur + 1;
11245
342k
    } else {
11246
342k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
342k
    }
11248
525k
    start = cur;
11249
11250
86.3M
    while (cur < end) {
11251
85.9M
        if (state == '-') {
11252
8.57M
            if ((*cur == '-') &&
11253
8.57M
                (cur[1] == '-') &&
11254
8.57M
                (cur[2] == '>')) {
11255
83.2k
                state = 0;
11256
83.2k
                cur += 3;
11257
83.2k
                start = cur;
11258
83.2k
                continue;
11259
83.2k
            }
11260
8.57M
        }
11261
77.3M
        else if (state == ']') {
11262
324k
            if (*cur == '>') {
11263
124k
                ctxt->checkIndex = 0;
11264
124k
                ctxt->endCheckState = 0;
11265
124k
                return(1);
11266
124k
            }
11267
199k
            if (IS_BLANK_CH(*cur)) {
11268
20.7k
                state = ' ';
11269
178k
            } else if (*cur != ']') {
11270
26.4k
                state = 0;
11271
26.4k
                start = cur;
11272
26.4k
                continue;
11273
26.4k
            }
11274
199k
        }
11275
77.0M
        else if (state == ' ') {
11276
204k
            if (*cur == '>') {
11277
822
                ctxt->checkIndex = 0;
11278
822
                ctxt->endCheckState = 0;
11279
822
                return(1);
11280
822
            }
11281
203k
            if (!IS_BLANK_CH(*cur)) {
11282
19.8k
                state = 0;
11283
19.8k
                start = cur;
11284
19.8k
                continue;
11285
19.8k
            }
11286
203k
        }
11287
76.8M
        else if (state != 0) {
11288
31.8M
            if (*cur == state) {
11289
788k
                state = 0;
11290
788k
                start = cur + 1;
11291
788k
            }
11292
31.8M
        }
11293
44.9M
        else if (*cur == '<') {
11294
1.24M
            if ((cur[1] == '!') &&
11295
1.24M
                (cur[2] == '-') &&
11296
1.24M
                (cur[3] == '-')) {
11297
85.3k
                state = '-';
11298
85.3k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
85.3k
                start = cur;
11301
85.3k
                continue;
11302
85.3k
            }
11303
1.24M
        }
11304
43.7M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
979k
            state = *cur;
11306
979k
        }
11307
11308
85.5M
        cur++;
11309
85.5M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
399k
    if ((state == 0) || (state == '-')) {
11316
237k
        if (cur - start < 3)
11317
16.9k
            cur = start;
11318
220k
        else
11319
220k
            cur -= 3;
11320
237k
    }
11321
399k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
399k
    ctxt->endCheckState = state;
11323
399k
    return(0);
11324
525k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
374k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
374k
    int ix;
11340
374k
    unsigned char c;
11341
374k
    int codepoint;
11342
11343
374k
    if ((utf == NULL) || (len <= 0))
11344
21.4k
        return(0);
11345
11346
19.4M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
19.2M
        c = utf[ix];
11348
19.2M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
16.1M
      if (c >= 0x20)
11350
14.9M
    ix++;
11351
1.24M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
1.21M
          ix++;
11353
35.4k
      else
11354
35.4k
          return(-ix);
11355
16.1M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
2.73M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
2.71M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
39.7k
          return(-ix);
11359
2.67M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
2.67M
      codepoint |= utf[ix+1] & 0x3f;
11361
2.67M
      if (!xmlIsCharQ(codepoint))
11362
8.39k
          return(-ix);
11363
2.67M
      ix += 2;
11364
2.67M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
153k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
146k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
146k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
14.7k
        return(-ix);
11369
131k
      codepoint = (utf[ix] & 0xf) << 12;
11370
131k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
131k
      codepoint |= utf[ix+2] & 0x3f;
11372
131k
      if (!xmlIsCharQ(codepoint))
11373
12.2k
          return(-ix);
11374
119k
      ix += 3;
11375
259k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
217k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
212k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
212k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
212k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
26.1k
        return(-ix);
11381
186k
      codepoint = (utf[ix] & 0x7) << 18;
11382
186k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
186k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
186k
      codepoint |= utf[ix+3] & 0x3f;
11385
186k
      if (!xmlIsCharQ(codepoint))
11386
8.15k
          return(-ix);
11387
178k
      ix += 4;
11388
178k
  } else       /* unknown encoding */
11389
41.7k
      return(-ix);
11390
19.2M
      }
11391
143k
      return(ix);
11392
353k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
6.07M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
6.07M
    int ret = 0;
11406
6.07M
    int avail, tlen;
11407
6.07M
    xmlChar cur, next;
11408
11409
6.07M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
6.07M
    if ((ctxt->input != NULL) &&
11466
6.07M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
71.2k
        xmlParserInputShrink(ctxt->input);
11468
71.2k
    }
11469
11470
56.5M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
56.5M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
169k
      return(0);
11473
11474
56.4M
  if (ctxt->input == NULL) break;
11475
56.4M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
56.4M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
56.4M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
56.4M
          (ctxt->input->buf->raw != NULL) &&
11488
56.4M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
1.18M
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
1.18M
                                                 ctxt->input);
11491
1.18M
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
1.18M
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
1.18M
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
1.18M
                                      base, current);
11496
1.18M
      }
11497
56.4M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
56.4M
        (ctxt->input->cur - ctxt->input->base);
11499
56.4M
  }
11500
56.4M
        if (avail < 1)
11501
238k
      goto done;
11502
56.1M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
1.87M
            case XML_PARSER_START:
11509
1.87M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
457k
        xmlChar start[4];
11511
457k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
457k
        if (avail < 4)
11517
26.2k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
431k
        start[0] = RAW;
11527
431k
        start[1] = NXT(1);
11528
431k
        start[2] = NXT(2);
11529
431k
        start[3] = NXT(3);
11530
431k
        enc = xmlDetectCharEncoding(start, 4);
11531
431k
        xmlSwitchEncoding(ctxt, enc);
11532
431k
        break;
11533
457k
    }
11534
11535
1.41M
    if (avail < 2)
11536
263
        goto done;
11537
1.41M
    cur = ctxt->input->cur[0];
11538
1.41M
    next = ctxt->input->cur[1];
11539
1.41M
    if (cur == 0) {
11540
2.20k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
2.20k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
2.20k
                  &xmlDefaultSAXLocator);
11543
2.20k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
2.20k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
2.20k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
2.20k
      ctxt->sax->endDocument(ctxt->userData);
11551
2.20k
        goto done;
11552
2.20k
    }
11553
1.41M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
1.14M
        if (avail < 5) goto done;
11556
1.14M
        if ((!terminate) &&
11557
1.14M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
842k
      goto done;
11559
301k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
301k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
301k
                  &xmlDefaultSAXLocator);
11562
301k
        if ((ctxt->input->cur[2] == 'x') &&
11563
301k
      (ctxt->input->cur[3] == 'm') &&
11564
301k
      (ctxt->input->cur[4] == 'l') &&
11565
301k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
234k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
234k
      xmlParseXMLDecl(ctxt);
11572
234k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
644
          xmlHaltParser(ctxt);
11578
644
          return(0);
11579
644
      }
11580
234k
      ctxt->standalone = ctxt->input->standalone;
11581
234k
      if ((ctxt->encoding == NULL) &&
11582
234k
          (ctxt->input->encoding != NULL))
11583
25.2k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
234k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
234k
          (!ctxt->disableSAX))
11586
197k
          ctxt->sax->startDocument(ctxt->userData);
11587
234k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
234k
        } else {
11593
66.6k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
66.6k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
66.6k
          (!ctxt->disableSAX))
11596
66.6k
          ctxt->sax->startDocument(ctxt->userData);
11597
66.6k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
66.6k
        }
11603
301k
    } else {
11604
271k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
271k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
271k
                  &xmlDefaultSAXLocator);
11607
271k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
271k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
271k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
271k
            (!ctxt->disableSAX))
11614
271k
      ctxt->sax->startDocument(ctxt->userData);
11615
271k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
271k
    }
11621
572k
    break;
11622
10.2M
            case XML_PARSER_START_TAG: {
11623
10.2M
          const xmlChar *name;
11624
10.2M
    const xmlChar *prefix = NULL;
11625
10.2M
    const xmlChar *URI = NULL;
11626
10.2M
                int line = ctxt->input->line;
11627
10.2M
    int nsNr = ctxt->nsNr;
11628
11629
10.2M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
10.2M
    cur = ctxt->input->cur[0];
11632
10.2M
          if (cur != '<') {
11633
22.7k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
22.7k
        xmlHaltParser(ctxt);
11635
22.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
22.7k
      ctxt->sax->endDocument(ctxt->userData);
11637
22.7k
        goto done;
11638
22.7k
    }
11639
10.1M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
2.13M
                    goto done;
11641
8.04M
    if (ctxt->spaceNr == 0)
11642
425k
        spacePush(ctxt, -1);
11643
7.61M
    else if (*ctxt->space == -2)
11644
1.62M
        spacePush(ctxt, -1);
11645
5.99M
    else
11646
5.99M
        spacePush(ctxt, *ctxt->space);
11647
8.04M
#ifdef LIBXML_SAX1_ENABLED
11648
8.04M
    if (ctxt->sax2)
11649
5.14M
#endif /* LIBXML_SAX1_ENABLED */
11650
5.14M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
2.89M
#ifdef LIBXML_SAX1_ENABLED
11652
2.89M
    else
11653
2.89M
        name = xmlParseStartTag(ctxt);
11654
8.04M
#endif /* LIBXML_SAX1_ENABLED */
11655
8.04M
    if (ctxt->instate == XML_PARSER_EOF)
11656
604
        goto done;
11657
8.04M
    if (name == NULL) {
11658
32.7k
        spacePop(ctxt);
11659
32.7k
        xmlHaltParser(ctxt);
11660
32.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
32.7k
      ctxt->sax->endDocument(ctxt->userData);
11662
32.7k
        goto done;
11663
32.7k
    }
11664
8.00M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
8.00M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
8.00M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
8.00M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
8.00M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
1.52M
        SKIP(2);
11680
11681
1.52M
        if (ctxt->sax2) {
11682
1.11M
      if ((ctxt->sax != NULL) &&
11683
1.11M
          (ctxt->sax->endElementNs != NULL) &&
11684
1.11M
          (!ctxt->disableSAX))
11685
1.11M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
1.11M
                                  prefix, URI);
11687
1.11M
      if (ctxt->nsNr - nsNr > 0)
11688
24.7k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
1.11M
#ifdef LIBXML_SAX1_ENABLED
11690
1.11M
        } else {
11691
413k
      if ((ctxt->sax != NULL) &&
11692
413k
          (ctxt->sax->endElement != NULL) &&
11693
413k
          (!ctxt->disableSAX))
11694
412k
          ctxt->sax->endElement(ctxt->userData, name);
11695
413k
#endif /* LIBXML_SAX1_ENABLED */
11696
413k
        }
11697
1.52M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
1.52M
        spacePop(ctxt);
11700
1.52M
        if (ctxt->nameNr == 0) {
11701
4.44k
      ctxt->instate = XML_PARSER_EPILOG;
11702
1.52M
        } else {
11703
1.52M
      ctxt->instate = XML_PARSER_CONTENT;
11704
1.52M
        }
11705
1.52M
        break;
11706
1.52M
    }
11707
6.48M
    if (RAW == '>') {
11708
3.65M
        NEXT;
11709
3.65M
    } else {
11710
2.82M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
2.82M
           "Couldn't find end of Start Tag %s\n",
11712
2.82M
           name);
11713
2.82M
        nodePop(ctxt);
11714
2.82M
        spacePop(ctxt);
11715
2.82M
    }
11716
6.48M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
6.48M
    ctxt->instate = XML_PARSER_CONTENT;
11719
6.48M
                break;
11720
8.00M
      }
11721
39.3M
            case XML_PARSER_CONTENT: {
11722
39.3M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
88.1k
        goto done;
11724
39.2M
    cur = ctxt->input->cur[0];
11725
39.2M
    next = ctxt->input->cur[1];
11726
11727
39.2M
    if ((cur == '<') && (next == '/')) {
11728
2.50M
        ctxt->instate = XML_PARSER_END_TAG;
11729
2.50M
        break;
11730
36.7M
          } else if ((cur == '<') && (next == '?')) {
11731
394k
        if ((!terminate) &&
11732
394k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
107k
      goto done;
11734
286k
        xmlParsePI(ctxt);
11735
286k
        ctxt->instate = XML_PARSER_CONTENT;
11736
36.3M
    } else if ((cur == '<') && (next != '!')) {
11737
7.72M
        ctxt->instate = XML_PARSER_START_TAG;
11738
7.72M
        break;
11739
28.6M
    } else if ((cur == '<') && (next == '!') &&
11740
28.6M
               (ctxt->input->cur[2] == '-') &&
11741
28.6M
         (ctxt->input->cur[3] == '-')) {
11742
485k
        if ((!terminate) &&
11743
485k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
171k
      goto done;
11745
314k
        xmlParseComment(ctxt);
11746
314k
        ctxt->instate = XML_PARSER_CONTENT;
11747
28.1M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
28.1M
        (ctxt->input->cur[2] == '[') &&
11749
28.1M
        (ctxt->input->cur[3] == 'C') &&
11750
28.1M
        (ctxt->input->cur[4] == 'D') &&
11751
28.1M
        (ctxt->input->cur[5] == 'A') &&
11752
28.1M
        (ctxt->input->cur[6] == 'T') &&
11753
28.1M
        (ctxt->input->cur[7] == 'A') &&
11754
28.1M
        (ctxt->input->cur[8] == '[')) {
11755
131k
        SKIP(9);
11756
131k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
131k
        break;
11758
27.9M
    } else if ((cur == '<') && (next == '!') &&
11759
27.9M
               (avail < 9)) {
11760
20.9k
        goto done;
11761
27.9M
    } else if (cur == '<') {
11762
1.13M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
1.13M
                    "detected an error in element content\n");
11764
1.13M
                    SKIP(1);
11765
26.8M
    } else if (cur == '&') {
11766
2.87M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
270k
      goto done;
11768
2.60M
        xmlParseReference(ctxt);
11769
23.9M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
23.9M
        if ((ctxt->inputNr == 1) &&
11783
23.9M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
11.0M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
741k
          goto done;
11786
11.0M
                    }
11787
23.2M
                    ctxt->checkIndex = 0;
11788
23.2M
        xmlParseCharData(ctxt, 0);
11789
23.2M
    }
11790
27.5M
    break;
11791
39.2M
      }
11792
27.5M
            case XML_PARSER_END_TAG:
11793
2.58M
    if (avail < 2)
11794
0
        goto done;
11795
2.58M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
78.1k
        goto done;
11797
2.50M
    if (ctxt->sax2) {
11798
1.66M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.66M
        nameNsPop(ctxt);
11800
1.66M
    }
11801
840k
#ifdef LIBXML_SAX1_ENABLED
11802
840k
      else
11803
840k
        xmlParseEndTag1(ctxt, 0);
11804
2.50M
#endif /* LIBXML_SAX1_ENABLED */
11805
2.50M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
2.50M
    } else if (ctxt->nameNr == 0) {
11808
34.5k
        ctxt->instate = XML_PARSER_EPILOG;
11809
2.47M
    } else {
11810
2.47M
        ctxt->instate = XML_PARSER_CONTENT;
11811
2.47M
    }
11812
2.50M
    break;
11813
495k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
495k
    const xmlChar *term;
11819
11820
495k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
45.9k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
45.9k
                                           "]]>");
11827
449k
                } else {
11828
449k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
449k
                }
11830
11831
495k
    if (term == NULL) {
11832
266k
        int tmp, size;
11833
11834
266k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
7.09k
                        size = ctxt->input->end - ctxt->input->cur;
11837
259k
                    } else {
11838
259k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
120k
                            goto done;
11840
138k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
138k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
138k
                    }
11844
145k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
145k
                    if (tmp <= 0) {
11846
89.5k
                        tmp = -tmp;
11847
89.5k
                        ctxt->input->cur += tmp;
11848
89.5k
                        goto encoding_error;
11849
89.5k
                    }
11850
56.3k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
56.3k
                        if (ctxt->sax->cdataBlock != NULL)
11852
29.6k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
29.6k
                                                  ctxt->input->cur, tmp);
11854
26.7k
                        else if (ctxt->sax->characters != NULL)
11855
26.7k
                            ctxt->sax->characters(ctxt->userData,
11856
26.7k
                                                  ctxt->input->cur, tmp);
11857
56.3k
                    }
11858
56.3k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
56.3k
                    SKIPL(tmp);
11861
228k
    } else {
11862
228k
                    int base = term - CUR_PTR;
11863
228k
        int tmp;
11864
11865
228k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
228k
        if ((tmp < 0) || (tmp != base)) {
11867
113k
      tmp = -tmp;
11868
113k
      ctxt->input->cur += tmp;
11869
113k
      goto encoding_error;
11870
113k
        }
11871
115k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
115k
            (ctxt->sax->cdataBlock != NULL) &&
11873
115k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
12.3k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
12.3k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
12.3k
                     "<![CDATA[", 9)))
11882
12.3k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
12.3k
                                 BAD_CAST "", 0);
11884
102k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
102k
      (!ctxt->disableSAX)) {
11886
93.5k
      if (ctxt->sax->cdataBlock != NULL)
11887
58.7k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
58.7k
              ctxt->input->cur, base);
11889
34.8k
      else if (ctxt->sax->characters != NULL)
11890
34.8k
          ctxt->sax->characters(ctxt->userData,
11891
34.8k
              ctxt->input->cur, base);
11892
93.5k
        }
11893
115k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
115k
        SKIPL(base + 3);
11896
115k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
115k
    }
11902
171k
    break;
11903
495k
      }
11904
887k
            case XML_PARSER_MISC:
11905
1.07M
            case XML_PARSER_PROLOG:
11906
1.11M
            case XML_PARSER_EPILOG:
11907
1.11M
    SKIP_BLANKS;
11908
1.11M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.11M
    else
11912
1.11M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.11M
                (ctxt->input->cur - ctxt->input->base);
11914
1.11M
    if (avail < 2)
11915
30.7k
        goto done;
11916
1.08M
    cur = ctxt->input->cur[0];
11917
1.08M
    next = ctxt->input->cur[1];
11918
1.08M
          if ((cur == '<') && (next == '?')) {
11919
132k
        if ((!terminate) &&
11920
132k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
31.9k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
100k
        xmlParsePI(ctxt);
11927
100k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
952k
    } else if ((cur == '<') && (next == '!') &&
11930
952k
        (ctxt->input->cur[2] == '-') &&
11931
952k
        (ctxt->input->cur[3] == '-')) {
11932
86.5k
        if ((!terminate) &&
11933
86.5k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
53.6k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
32.8k
        xmlParseComment(ctxt);
11940
32.8k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
866k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
866k
                    (cur == '<') && (next == '!') &&
11944
866k
        (ctxt->input->cur[2] == 'D') &&
11945
866k
        (ctxt->input->cur[3] == 'O') &&
11946
866k
        (ctxt->input->cur[4] == 'C') &&
11947
866k
        (ctxt->input->cur[5] == 'T') &&
11948
866k
        (ctxt->input->cur[6] == 'Y') &&
11949
866k
        (ctxt->input->cur[7] == 'P') &&
11950
866k
        (ctxt->input->cur[8] == 'E')) {
11951
438k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
172k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
266k
        ctxt->inSubset = 1;
11958
266k
        xmlParseDocTypeDecl(ctxt);
11959
266k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
266k
        if (RAW == '[') {
11962
203k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
203k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
63.1k
      ctxt->inSubset = 2;
11972
63.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
63.1k
          (ctxt->sax->externalSubset != NULL))
11974
57.9k
          ctxt->sax->externalSubset(ctxt->userData,
11975
57.9k
            ctxt->intSubName, ctxt->extSubSystem,
11976
57.9k
            ctxt->extSubURI);
11977
63.1k
      ctxt->inSubset = 0;
11978
63.1k
      xmlCleanSpecialAttr(ctxt);
11979
63.1k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
63.1k
        }
11985
427k
    } else if ((cur == '<') && (next == '!') &&
11986
427k
               (avail <
11987
33.8k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
29.8k
        goto done;
11989
397k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
10.2k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
10.2k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
10.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
10.2k
      ctxt->sax->endDocument(ctxt->userData);
11998
10.2k
        goto done;
11999
387k
                } else {
12000
387k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
387k
    }
12006
787k
    break;
12007
787k
            case XML_PARSER_DTD: {
12008
580k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
399k
                    goto done;
12010
180k
    xmlParseInternalSubset(ctxt);
12011
180k
    if (ctxt->instate == XML_PARSER_EOF)
12012
67.8k
        goto done;
12013
112k
    ctxt->inSubset = 2;
12014
112k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
112k
        (ctxt->sax->externalSubset != NULL))
12016
108k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
108k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
112k
    ctxt->inSubset = 0;
12019
112k
    xmlCleanSpecialAttr(ctxt);
12020
112k
    if (ctxt->instate == XML_PARSER_EOF)
12021
6.97k
        goto done;
12022
105k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
105k
                break;
12028
112k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
56.1M
  }
12102
56.1M
    }
12103
5.70M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
5.70M
    return(ret);
12108
203k
encoding_error:
12109
203k
    {
12110
203k
        char buffer[150];
12111
12112
203k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
203k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
203k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
203k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
203k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
203k
         BAD_CAST buffer, NULL);
12118
203k
    }
12119
203k
    return(0);
12120
6.07M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
7.52M
              int terminate) {
12136
7.52M
    int end_in_lf = 0;
12137
7.52M
    int remain = 0;
12138
12139
7.52M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
7.52M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
1.59M
        return(ctxt->errNo);
12143
5.92M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.40k
        return(-1);
12145
5.92M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
5.92M
    ctxt->progressive = 1;
12149
5.92M
    if (ctxt->instate == XML_PARSER_START)
12150
1.29M
        xmlDetectSAX2(ctxt);
12151
5.92M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
5.92M
        (chunk[size - 1] == '\r')) {
12153
34.7k
  end_in_lf = 1;
12154
34.7k
  size--;
12155
34.7k
    }
12156
12157
6.08M
xmldecl_done:
12158
12159
6.08M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
6.08M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
5.68M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
5.68M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
5.68M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
5.68M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
5.68M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
250k
            unsigned int len = 45;
12173
12174
250k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
250k
                               BAD_CAST "UTF-16")) ||
12176
250k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
1.08k
                               BAD_CAST "UTF16")))
12178
249k
                len = 90;
12179
1.08k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
1.08k
                                    BAD_CAST "UCS-4")) ||
12181
1.08k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
16
                                    BAD_CAST "UCS4")))
12183
1.06k
                len = 180;
12184
12185
250k
            if (ctxt->input->buf->rawconsumed < len)
12186
12.0k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
250k
            if ((unsigned int) size > len) {
12194
158k
                remain = size - len;
12195
158k
                size = len;
12196
158k
            } else {
12197
91.8k
                remain = 0;
12198
91.8k
            }
12199
250k
        }
12200
5.68M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
5.68M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
5.68M
  if (res < 0) {
12203
1.48k
      ctxt->errNo = XML_PARSER_EOF;
12204
1.48k
      xmlHaltParser(ctxt);
12205
1.48k
      return (XML_PARSER_EOF);
12206
1.48k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
5.68M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
396k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
396k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
396k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
396k
        (in->raw != NULL)) {
12216
40.8k
    int nbchars;
12217
40.8k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
40.8k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
40.8k
    nbchars = xmlCharEncInput(in, terminate);
12221
40.8k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
40.8k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
2.47k
        xmlGenericError(xmlGenericErrorContext,
12225
2.47k
            "xmlParseChunk: encoder error\n");
12226
2.47k
                    xmlHaltParser(ctxt);
12227
2.47k
        return(XML_ERR_INVALID_ENCODING);
12228
2.47k
    }
12229
40.8k
      }
12230
396k
  }
12231
396k
    }
12232
12233
6.07M
    if (remain != 0) {
12234
158k
        xmlParseTryOrFinish(ctxt, 0);
12235
5.91M
    } else {
12236
5.91M
        xmlParseTryOrFinish(ctxt, terminate);
12237
5.91M
    }
12238
6.07M
    if (ctxt->instate == XML_PARSER_EOF)
12239
144k
        return(ctxt->errNo);
12240
12241
5.93M
    if ((ctxt->input != NULL) &&
12242
5.93M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
5.93M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
5.93M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
5.93M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
172k
        return(ctxt->errNo);
12250
12251
5.76M
    if (remain != 0) {
12252
157k
        chunk += size;
12253
157k
        size = remain;
12254
157k
        remain = 0;
12255
157k
        goto xmldecl_done;
12256
157k
    }
12257
5.60M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
5.60M
        (ctxt->input->buf != NULL)) {
12259
34.0k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
34.0k
           ctxt->input);
12261
34.0k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
34.0k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
34.0k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
34.0k
            base, current);
12267
34.0k
    }
12268
5.60M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
169k
  int cur_avail = 0;
12273
12274
169k
  if (ctxt->input != NULL) {
12275
169k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
169k
      else
12279
169k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
169k
                    (ctxt->input->cur - ctxt->input->base);
12281
169k
  }
12282
12283
169k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
169k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
146k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
146k
  }
12287
169k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
408
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
408
  }
12290
169k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
169k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
169k
    ctxt->sax->endDocument(ctxt->userData);
12293
169k
  }
12294
169k
  ctxt->instate = XML_PARSER_EOF;
12295
169k
    }
12296
5.60M
    if (ctxt->wellFormed == 0)
12297
2.76M
  return((xmlParserErrors) ctxt->errNo);
12298
2.83M
    else
12299
2.83M
        return(0);
12300
5.60M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
588k
                        const char *chunk, int size, const char *filename) {
12330
588k
    xmlParserCtxtPtr ctxt;
12331
588k
    xmlParserInputPtr inputStream;
12332
588k
    xmlParserInputBufferPtr buf;
12333
588k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
588k
    if ((chunk != NULL) && (size >= 4))
12339
288k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
588k
    buf = xmlAllocParserInputBuffer(enc);
12342
588k
    if (buf == NULL) return(NULL);
12343
12344
588k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
588k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
588k
    ctxt->dictNames = 1;
12351
588k
    if (filename == NULL) {
12352
294k
  ctxt->directory = NULL;
12353
294k
    } else {
12354
294k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
294k
    }
12356
12357
588k
    inputStream = xmlNewInputStream(ctxt);
12358
588k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
588k
    if (filename == NULL)
12365
294k
  inputStream->filename = NULL;
12366
294k
    else {
12367
294k
  inputStream->filename = (char *)
12368
294k
      xmlCanonicPath((const xmlChar *) filename);
12369
294k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
294k
    }
12376
588k
    inputStream->buf = buf;
12377
588k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
588k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
588k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
588k
    if ((size != 0) && (chunk != NULL) &&
12388
588k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
288k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
288k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
288k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
288k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
288k
    }
12399
12400
588k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
145k
        xmlSwitchEncoding(ctxt, enc);
12402
145k
    }
12403
12404
588k
    return(ctxt);
12405
588k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
516k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
516k
    if (ctxt == NULL)
12418
0
        return;
12419
516k
    ctxt->instate = XML_PARSER_EOF;
12420
516k
    ctxt->disableSAX = 1;
12421
522k
    while (ctxt->inputNr > 1)
12422
6.24k
        xmlFreeInputStream(inputPop(ctxt));
12423
516k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
516k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
516k
        if (ctxt->input->buf != NULL) {
12433
459k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
459k
            ctxt->input->buf = NULL;
12435
459k
        }
12436
516k
  ctxt->input->cur = BAD_CAST"";
12437
516k
        ctxt->input->length = 0;
12438
516k
  ctxt->input->base = ctxt->input->cur;
12439
516k
        ctxt->input->end = ctxt->input->cur;
12440
516k
    }
12441
516k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
294k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
294k
    if (ctxt == NULL)
12452
0
        return;
12453
294k
    xmlHaltParser(ctxt);
12454
294k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
294k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
262k
          const xmlChar *ID, xmlNodePtr *list) {
12832
262k
    xmlParserCtxtPtr ctxt;
12833
262k
    xmlDocPtr newDoc;
12834
262k
    xmlNodePtr newRoot;
12835
262k
    xmlParserErrors ret = XML_ERR_OK;
12836
262k
    xmlChar start[4];
12837
262k
    xmlCharEncoding enc;
12838
12839
262k
    if (((depth > 40) &&
12840
262k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
262k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
262k
    if (list != NULL)
12848
37.6k
        *list = NULL;
12849
262k
    if ((URL == NULL) && (ID == NULL))
12850
412
  return(XML_ERR_INTERNAL_ERROR);
12851
262k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
262k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
262k
                                             oldctxt);
12856
262k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
44.5k
    if (oldctxt != NULL) {
12858
44.5k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
44.5k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
44.5k
    }
12861
44.5k
    xmlDetectSAX2(ctxt);
12862
12863
44.5k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
44.5k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
44.5k
    newDoc->properties = XML_DOC_INTERNAL;
12869
44.5k
    if (doc) {
12870
44.5k
        newDoc->intSubset = doc->intSubset;
12871
44.5k
        newDoc->extSubset = doc->extSubset;
12872
44.5k
        if (doc->dict) {
12873
25.8k
            newDoc->dict = doc->dict;
12874
25.8k
            xmlDictReference(newDoc->dict);
12875
25.8k
        }
12876
44.5k
        if (doc->URL != NULL) {
12877
27.6k
            newDoc->URL = xmlStrdup(doc->URL);
12878
27.6k
        }
12879
44.5k
    }
12880
44.5k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
44.5k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
44.5k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
44.5k
    nodePush(ctxt, newDoc->children);
12891
44.5k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
44.5k
    } else {
12894
44.5k
        ctxt->myDoc = doc;
12895
44.5k
        newRoot->doc = doc;
12896
44.5k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
44.5k
    GROW;
12904
44.5k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
40.1k
  start[0] = RAW;
12906
40.1k
  start[1] = NXT(1);
12907
40.1k
  start[2] = NXT(2);
12908
40.1k
  start[3] = NXT(3);
12909
40.1k
  enc = xmlDetectCharEncoding(start, 4);
12910
40.1k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
2.19k
      xmlSwitchEncoding(ctxt, enc);
12912
2.19k
  }
12913
40.1k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
44.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
695
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
695
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
695
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
113
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
113
                           "Version mismatch between document and entity\n");
12927
113
        }
12928
695
    }
12929
12930
44.5k
    ctxt->instate = XML_PARSER_CONTENT;
12931
44.5k
    ctxt->depth = depth;
12932
44.5k
    if (oldctxt != NULL) {
12933
44.5k
  ctxt->_private = oldctxt->_private;
12934
44.5k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
44.5k
  ctxt->validate = oldctxt->validate;
12936
44.5k
  ctxt->valid = oldctxt->valid;
12937
44.5k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
44.5k
        if (oldctxt->validate) {
12939
15.3k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
15.3k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
15.3k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
15.3k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
15.3k
        }
12944
44.5k
  ctxt->external = oldctxt->external;
12945
44.5k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
44.5k
        ctxt->dict = oldctxt->dict;
12947
44.5k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
44.5k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
44.5k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
44.5k
        ctxt->dictNames = oldctxt->dictNames;
12951
44.5k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
44.5k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
44.5k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
44.5k
  ctxt->record_info = oldctxt->record_info;
12955
44.5k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
44.5k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
44.5k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
44.5k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
44.5k
    xmlParseContent(ctxt);
12970
12971
44.5k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
953
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
43.5k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
44.5k
    if (ctxt->node != newDoc->children) {
12977
5.95k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
5.95k
    }
12979
12980
44.5k
    if (!ctxt->wellFormed) {
12981
14.9k
  ret = (xmlParserErrors)ctxt->errNo;
12982
14.9k
        if (oldctxt != NULL) {
12983
14.9k
            oldctxt->errNo = ctxt->errNo;
12984
14.9k
            oldctxt->wellFormed = 0;
12985
14.9k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
14.9k
        }
12987
29.5k
    } else {
12988
29.5k
  if (list != NULL) {
12989
5.28k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
5.28k
      cur = newDoc->children->children;
12996
5.28k
      *list = cur;
12997
177k
      while (cur != NULL) {
12998
172k
    cur->parent = NULL;
12999
172k
    cur = cur->next;
13000
172k
      }
13001
5.28k
            newDoc->children->children = NULL;
13002
5.28k
  }
13003
29.5k
  ret = XML_ERR_OK;
13004
29.5k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
44.5k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
44.5k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
44.5k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
44.5k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
44.5k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
44.5k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
44.5k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
44.5k
    }
13020
13021
44.5k
    if (oldctxt != NULL) {
13022
44.5k
        ctxt->dict = NULL;
13023
44.5k
        ctxt->attsDefault = NULL;
13024
44.5k
        ctxt->attsSpecial = NULL;
13025
44.5k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
44.5k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
44.5k
        oldctxt->validate = ctxt->validate;
13028
44.5k
        oldctxt->valid = ctxt->valid;
13029
44.5k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
44.5k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
44.5k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
44.5k
    }
13033
44.5k
    ctxt->node_seq.maximum = 0;
13034
44.5k
    ctxt->node_seq.length = 0;
13035
44.5k
    ctxt->node_seq.buffer = NULL;
13036
44.5k
    xmlFreeParserCtxt(ctxt);
13037
44.5k
    newDoc->intSubset = NULL;
13038
44.5k
    newDoc->extSubset = NULL;
13039
44.5k
    xmlFreeDoc(newDoc);
13040
13041
44.5k
    return(ret);
13042
44.5k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
57.5k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
57.5k
    xmlParserCtxtPtr ctxt;
13125
57.5k
    xmlDocPtr newDoc = NULL;
13126
57.5k
    xmlNodePtr newRoot;
13127
57.5k
    xmlSAXHandlerPtr oldsax = NULL;
13128
57.5k
    xmlNodePtr content = NULL;
13129
57.5k
    xmlNodePtr last = NULL;
13130
57.5k
    int size;
13131
57.5k
    xmlParserErrors ret = XML_ERR_OK;
13132
57.5k
#ifdef SAX2
13133
57.5k
    int i;
13134
57.5k
#endif
13135
13136
57.5k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
57.5k
        (oldctxt->depth >  100)) {
13138
51
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
51
                       "Maximum entity nesting depth exceeded");
13140
51
  return(XML_ERR_ENTITY_LOOP);
13141
51
    }
13142
13143
13144
57.4k
    if (lst != NULL)
13145
54.0k
        *lst = NULL;
13146
57.4k
    if (string == NULL)
13147
21
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
57.4k
    size = xmlStrlen(string);
13150
13151
57.4k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
57.4k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
54.7k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
54.7k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
54.7k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
54.7k
    else
13158
54.7k
  ctxt->userData = ctxt;
13159
54.7k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
54.7k
    ctxt->dict = oldctxt->dict;
13161
54.7k
    ctxt->input_id = oldctxt->input_id;
13162
54.7k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
54.7k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
54.7k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
54.7k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
55.0k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
339
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
339
    }
13171
54.7k
#endif
13172
13173
54.7k
    oldsax = ctxt->sax;
13174
54.7k
    ctxt->sax = oldctxt->sax;
13175
54.7k
    xmlDetectSAX2(ctxt);
13176
54.7k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
54.7k
    ctxt->options = oldctxt->options;
13178
13179
54.7k
    ctxt->_private = oldctxt->_private;
13180
54.7k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
54.7k
    } else {
13193
54.7k
  ctxt->myDoc = oldctxt->myDoc;
13194
54.7k
        content = ctxt->myDoc->children;
13195
54.7k
  last = ctxt->myDoc->last;
13196
54.7k
    }
13197
54.7k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
54.7k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
54.7k
    ctxt->myDoc->children = NULL;
13208
54.7k
    ctxt->myDoc->last = NULL;
13209
54.7k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
54.7k
    nodePush(ctxt, ctxt->myDoc->children);
13211
54.7k
    ctxt->instate = XML_PARSER_CONTENT;
13212
54.7k
    ctxt->depth = oldctxt->depth;
13213
13214
54.7k
    ctxt->validate = 0;
13215
54.7k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
54.7k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
45.7k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
45.7k
    }
13222
54.7k
    ctxt->dictNames = oldctxt->dictNames;
13223
54.7k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
54.7k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
54.7k
    xmlParseContent(ctxt);
13227
54.7k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
321
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
54.4k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
54.7k
    if (ctxt->node != ctxt->myDoc->children) {
13233
1.48k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
1.48k
    }
13235
13236
54.7k
    if (!ctxt->wellFormed) {
13237
6.97k
  ret = (xmlParserErrors)ctxt->errNo;
13238
6.97k
        oldctxt->errNo = ctxt->errNo;
13239
6.97k
        oldctxt->wellFormed = 0;
13240
6.97k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
47.7k
    } else {
13242
47.7k
        ret = XML_ERR_OK;
13243
47.7k
    }
13244
13245
54.7k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
46.6k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
46.6k
  cur = ctxt->myDoc->children->children;
13253
46.6k
  *lst = cur;
13254
170k
  while (cur != NULL) {
13255
123k
#ifdef LIBXML_VALID_ENABLED
13256
123k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
123k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
123k
    (cur->type == XML_ELEMENT_NODE)) {
13259
20.9k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
20.9k
      oldctxt->myDoc, cur);
13261
20.9k
      }
13262
123k
#endif /* LIBXML_VALID_ENABLED */
13263
123k
      cur->parent = NULL;
13264
123k
      cur = cur->next;
13265
123k
  }
13266
46.6k
  ctxt->myDoc->children->children = NULL;
13267
46.6k
    }
13268
54.7k
    if (ctxt->myDoc != NULL) {
13269
54.7k
  xmlFreeNode(ctxt->myDoc->children);
13270
54.7k
        ctxt->myDoc->children = content;
13271
54.7k
        ctxt->myDoc->last = last;
13272
54.7k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
54.7k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
54.7k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
54.7k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
54.7k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
54.7k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
54.7k
    }
13285
13286
54.7k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
54.7k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
54.7k
    ctxt->sax = oldsax;
13289
54.7k
    ctxt->dict = NULL;
13290
54.7k
    ctxt->attsDefault = NULL;
13291
54.7k
    ctxt->attsSpecial = NULL;
13292
54.7k
    xmlFreeParserCtxt(ctxt);
13293
54.7k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
54.7k
    return(ret);
13298
54.7k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
262k
        xmlParserCtxtPtr pctx) {
13783
262k
    xmlParserCtxtPtr ctxt;
13784
262k
    xmlParserInputPtr inputStream;
13785
262k
    char *directory = NULL;
13786
262k
    xmlChar *uri;
13787
13788
262k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
262k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
262k
    if (pctx != NULL) {
13794
262k
        ctxt->options = pctx->options;
13795
262k
        ctxt->_private = pctx->_private;
13796
262k
  ctxt->input_id = pctx->input_id;
13797
262k
    }
13798
13799
    /* Don't read from stdin. */
13800
262k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
18
        URL = BAD_CAST "./-";
13802
13803
262k
    uri = xmlBuildURI(URL, base);
13804
13805
262k
    if (uri == NULL) {
13806
6.93k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
6.93k
  if (inputStream == NULL) {
13808
6.59k
      xmlFreeParserCtxt(ctxt);
13809
6.59k
      return(NULL);
13810
6.59k
  }
13811
13812
340
  inputPush(ctxt, inputStream);
13813
13814
340
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
340
      directory = xmlParserGetDirectory((char *)URL);
13816
340
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
340
      ctxt->directory = directory;
13818
255k
    } else {
13819
255k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
255k
  if (inputStream == NULL) {
13821
211k
      xmlFree(uri);
13822
211k
      xmlFreeParserCtxt(ctxt);
13823
211k
      return(NULL);
13824
211k
  }
13825
13826
44.1k
  inputPush(ctxt, inputStream);
13827
13828
44.1k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
44.1k
      directory = xmlParserGetDirectory((char *)uri);
13830
44.1k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
44.1k
      ctxt->directory = directory;
13832
44.1k
  xmlFree(uri);
13833
44.1k
    }
13834
44.5k
    return(ctxt);
13835
262k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
351k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
351k
    xmlParserCtxtPtr ctxt;
14178
351k
    xmlParserInputPtr input;
14179
351k
    xmlParserInputBufferPtr buf;
14180
14181
351k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
351k
    if (size <= 0)
14184
3.13k
  return(NULL);
14185
14186
348k
    ctxt = xmlNewParserCtxt();
14187
348k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
348k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
348k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
348k
    input = xmlNewInputStream(ctxt);
14197
348k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
348k
    input->filename = NULL;
14204
348k
    input->buf = buf;
14205
348k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
348k
    inputPush(ctxt, input);
14208
348k
    return(ctxt);
14209
348k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
275M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
275M
    if (xmlParserInitialized != 0)
14525
275M
  return;
14526
14527
2.96k
#ifdef LIBXML_THREAD_ENABLED
14528
2.96k
    __xmlGlobalInitMutexLock();
14529
2.96k
    if (xmlParserInitialized == 0) {
14530
2.96k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
2.96k
  xmlInitThreadsInternal();
14537
2.96k
  xmlInitGlobalsInternal();
14538
2.96k
  xmlInitMemoryInternal();
14539
2.96k
        __xmlInitializeDict();
14540
2.96k
  xmlInitEncodingInternal();
14541
2.96k
  xmlRegisterDefaultInputCallbacks();
14542
2.96k
#ifdef LIBXML_OUTPUT_ENABLED
14543
2.96k
  xmlRegisterDefaultOutputCallbacks();
14544
2.96k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
2.96k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
2.96k
  xmlInitXPathInternal();
14547
2.96k
#endif
14548
2.96k
  xmlParserInitialized = 1;
14549
2.96k
#ifdef LIBXML_THREAD_ENABLED
14550
2.96k
    }
14551
2.96k
    __xmlGlobalInitMutexUnlock();
14552
2.96k
#endif
14553
2.96k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
882k
{
14843
882k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
882k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
882k
    if (options & XML_PARSE_RECOVER) {
14851
497k
        ctxt->recovery = 1;
14852
497k
        options -= XML_PARSE_RECOVER;
14853
497k
  ctxt->options |= XML_PARSE_RECOVER;
14854
497k
    } else
14855
384k
        ctxt->recovery = 0;
14856
882k
    if (options & XML_PARSE_DTDLOAD) {
14857
628k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
628k
        options -= XML_PARSE_DTDLOAD;
14859
628k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
628k
    } else
14861
254k
        ctxt->loadsubset = 0;
14862
882k
    if (options & XML_PARSE_DTDATTR) {
14863
345k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
345k
        options -= XML_PARSE_DTDATTR;
14865
345k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
345k
    }
14867
882k
    if (options & XML_PARSE_NOENT) {
14868
560k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
560k
        options -= XML_PARSE_NOENT;
14871
560k
  ctxt->options |= XML_PARSE_NOENT;
14872
560k
    } else
14873
321k
        ctxt->replaceEntities = 0;
14874
882k
    if (options & XML_PARSE_PEDANTIC) {
14875
187k
        ctxt->pedantic = 1;
14876
187k
        options -= XML_PARSE_PEDANTIC;
14877
187k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
187k
    } else
14879
694k
        ctxt->pedantic = 0;
14880
882k
    if (options & XML_PARSE_NOBLANKS) {
14881
354k
        ctxt->keepBlanks = 0;
14882
354k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
354k
        options -= XML_PARSE_NOBLANKS;
14884
354k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
354k
    } else
14886
527k
        ctxt->keepBlanks = 1;
14887
882k
    if (options & XML_PARSE_DTDVALID) {
14888
343k
        ctxt->validate = 1;
14889
343k
        if (options & XML_PARSE_NOWARNING)
14890
244k
            ctxt->vctxt.warning = NULL;
14891
343k
        if (options & XML_PARSE_NOERROR)
14892
257k
            ctxt->vctxt.error = NULL;
14893
343k
        options -= XML_PARSE_DTDVALID;
14894
343k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
343k
    } else
14896
538k
        ctxt->validate = 0;
14897
882k
    if (options & XML_PARSE_NOWARNING) {
14898
360k
        ctxt->sax->warning = NULL;
14899
360k
        options -= XML_PARSE_NOWARNING;
14900
360k
    }
14901
882k
    if (options & XML_PARSE_NOERROR) {
14902
434k
        ctxt->sax->error = NULL;
14903
434k
        ctxt->sax->fatalError = NULL;
14904
434k
        options -= XML_PARSE_NOERROR;
14905
434k
    }
14906
882k
#ifdef LIBXML_SAX1_ENABLED
14907
882k
    if (options & XML_PARSE_SAX1) {
14908
303k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
303k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
303k
        ctxt->sax->startElementNs = NULL;
14911
303k
        ctxt->sax->endElementNs = NULL;
14912
303k
        ctxt->sax->initialized = 1;
14913
303k
        options -= XML_PARSE_SAX1;
14914
303k
  ctxt->options |= XML_PARSE_SAX1;
14915
303k
    }
14916
882k
#endif /* LIBXML_SAX1_ENABLED */
14917
882k
    if (options & XML_PARSE_NODICT) {
14918
329k
        ctxt->dictNames = 0;
14919
329k
        options -= XML_PARSE_NODICT;
14920
329k
  ctxt->options |= XML_PARSE_NODICT;
14921
552k
    } else {
14922
552k
        ctxt->dictNames = 1;
14923
552k
    }
14924
882k
    if (options & XML_PARSE_NOCDATA) {
14925
354k
        ctxt->sax->cdataBlock = NULL;
14926
354k
        options -= XML_PARSE_NOCDATA;
14927
354k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
354k
    }
14929
882k
    if (options & XML_PARSE_NSCLEAN) {
14930
457k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
457k
        options -= XML_PARSE_NSCLEAN;
14932
457k
    }
14933
882k
    if (options & XML_PARSE_NONET) {
14934
385k
  ctxt->options |= XML_PARSE_NONET;
14935
385k
        options -= XML_PARSE_NONET;
14936
385k
    }
14937
882k
    if (options & XML_PARSE_COMPACT) {
14938
536k
  ctxt->options |= XML_PARSE_COMPACT;
14939
536k
        options -= XML_PARSE_COMPACT;
14940
536k
    }
14941
882k
    if (options & XML_PARSE_OLD10) {
14942
279k
  ctxt->options |= XML_PARSE_OLD10;
14943
279k
        options -= XML_PARSE_OLD10;
14944
279k
    }
14945
882k
    if (options & XML_PARSE_NOBASEFIX) {
14946
334k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
334k
        options -= XML_PARSE_NOBASEFIX;
14948
334k
    }
14949
882k
    if (options & XML_PARSE_HUGE) {
14950
267k
  ctxt->options |= XML_PARSE_HUGE;
14951
267k
        options -= XML_PARSE_HUGE;
14952
267k
        if (ctxt->dict != NULL)
14953
267k
            xmlDictSetLimit(ctxt->dict, 0);
14954
267k
    }
14955
882k
    if (options & XML_PARSE_OLDSAX) {
14956
282k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
282k
        options -= XML_PARSE_OLDSAX;
14958
282k
    }
14959
882k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
426k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
426k
        options -= XML_PARSE_IGNORE_ENC;
14962
426k
    }
14963
882k
    if (options & XML_PARSE_BIG_LINES) {
14964
357k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
357k
        options -= XML_PARSE_BIG_LINES;
14966
357k
    }
14967
882k
    ctxt->linenumbers = 1;
14968
882k
    return (options);
14969
882k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
588k
{
14984
588k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
588k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
293k
{
15003
293k
    xmlDocPtr ret;
15004
15005
293k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
293k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
293k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
293k
        (ctxt->input->filename == NULL))
15015
293k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
293k
    xmlParseDocument(ctxt);
15017
293k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
173k
        ret = ctxt->myDoc;
15019
119k
    else {
15020
119k
        ret = NULL;
15021
119k
  if (ctxt->myDoc != NULL) {
15022
101k
      xmlFreeDoc(ctxt->myDoc);
15023
101k
  }
15024
119k
    }
15025
293k
    ctxt->myDoc = NULL;
15026
293k
    if (!reuse) {
15027
293k
  xmlFreeParserCtxt(ctxt);
15028
293k
    }
15029
15030
293k
    return (ret);
15031
293k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
294k
{
15096
294k
    xmlParserCtxtPtr ctxt;
15097
15098
294k
    xmlInitParser();
15099
294k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
294k
    if (ctxt == NULL)
15101
429
        return (NULL);
15102
293k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
294k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387