Coverage Report

Created: 2023-09-24 16:02

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
9.98M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
1.31k
#define XML_PARSER_NON_LINEAR 10
129
130
63.0M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
1.99G
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
10.1G
#define XML_PARSER_BUFFER_SIZE 100
147
622k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
1.32G
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
81.8k
{
215
81.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
81.8k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
81.8k
    if (ctxt != NULL)
219
81.8k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
81.8k
    if (prefix == NULL)
222
44.0k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
44.0k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
44.0k
                        (const char *) localname, NULL, NULL, 0, 0,
225
44.0k
                        "Attribute %s redefined\n", localname);
226
37.8k
    else
227
37.8k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
37.8k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
37.8k
                        (const char *) prefix, (const char *) localname,
230
37.8k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
37.8k
                        localname);
232
81.8k
    if (ctxt != NULL) {
233
81.8k
  ctxt->wellFormed = 0;
234
81.8k
  if (ctxt->recovery == 0)
235
13.7k
      ctxt->disableSAX = 1;
236
81.8k
    }
237
81.8k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
4.54M
{
250
4.54M
    const char *errmsg;
251
252
4.54M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
4.54M
        (ctxt->instate == XML_PARSER_EOF))
254
25.3k
  return;
255
4.51M
    switch (error) {
256
91.3k
        case XML_ERR_INVALID_HEX_CHARREF:
257
91.3k
            errmsg = "CharRef: invalid hexadecimal value";
258
91.3k
            break;
259
89.4k
        case XML_ERR_INVALID_DEC_CHARREF:
260
89.4k
            errmsg = "CharRef: invalid decimal value";
261
89.4k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
1.93M
        case XML_ERR_INTERNAL_ERROR:
266
1.93M
            errmsg = "internal error";
267
1.93M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
36.9k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
36.9k
            errmsg = "PEReference: expecting ';'";
282
36.9k
            break;
283
1.60k
        case XML_ERR_ENTITY_LOOP:
284
1.60k
            errmsg = "Detected an entity reference loop";
285
1.60k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
5.90k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
5.90k
            errmsg = "PEReferences forbidden in internal subset";
291
5.90k
            break;
292
6.46k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
6.46k
            errmsg = "EntityValue: \" or ' expected";
294
6.46k
            break;
295
116k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
116k
            errmsg = "AttValue: \" or ' expected";
297
116k
            break;
298
616k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
616k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
616k
            break;
301
13.2k
        case XML_ERR_LITERAL_NOT_STARTED:
302
13.2k
            errmsg = "SystemLiteral \" or ' expected";
303
13.2k
            break;
304
14.9k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
14.9k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
14.9k
            break;
307
62.2k
        case XML_ERR_MISPLACED_CDATA_END:
308
62.2k
            errmsg = "Sequence ']]>' not allowed in content";
309
62.2k
            break;
310
12.4k
        case XML_ERR_URI_REQUIRED:
311
12.4k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
12.4k
            break;
313
1.21k
        case XML_ERR_PUBID_REQUIRED:
314
1.21k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
1.21k
            break;
316
259k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
259k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
259k
            break;
319
25.0k
        case XML_ERR_PI_NOT_STARTED:
320
25.0k
            errmsg = "xmlParsePI : no target name";
321
25.0k
            break;
322
5.12k
        case XML_ERR_RESERVED_XML_NAME:
323
5.12k
            errmsg = "Invalid PI name";
324
5.12k
            break;
325
2.01k
        case XML_ERR_NOTATION_NOT_STARTED:
326
2.01k
            errmsg = "NOTATION: Name expected here";
327
2.01k
            break;
328
8.03k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
8.03k
            errmsg = "'>' required to close NOTATION declaration";
330
8.03k
            break;
331
10.2k
        case XML_ERR_VALUE_REQUIRED:
332
10.2k
            errmsg = "Entity value required";
333
10.2k
            break;
334
1.33k
        case XML_ERR_URI_FRAGMENT:
335
1.33k
            errmsg = "Fragment not allowed";
336
1.33k
            break;
337
14.6k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
14.6k
            errmsg = "'(' required to start ATTLIST enumeration";
339
14.6k
            break;
340
450
        case XML_ERR_NMTOKEN_REQUIRED:
341
450
            errmsg = "NmToken expected in ATTLIST enumeration";
342
450
            break;
343
4.88k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
4.88k
            errmsg = "')' required to finish ATTLIST enumeration";
345
4.88k
            break;
346
1.60k
        case XML_ERR_MIXED_NOT_STARTED:
347
1.60k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
1.60k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
4.53k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
4.53k
            errmsg = "ContentDecl : Name or '(' expected";
354
4.53k
            break;
355
5.71k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
5.71k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
5.71k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
481k
        case XML_ERR_GT_REQUIRED:
363
481k
            errmsg = "expected '>'";
364
481k
            break;
365
178
        case XML_ERR_CONDSEC_INVALID:
366
178
            errmsg = "XML conditional section '[' expected";
367
178
            break;
368
11.3k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
11.3k
            errmsg = "Content error in the external subset";
370
11.3k
            break;
371
1.30k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
1.30k
            errmsg =
373
1.30k
                "conditional section INCLUDE or IGNORE keyword expected";
374
1.30k
            break;
375
1.23k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
1.23k
            errmsg = "XML conditional section not closed";
377
1.23k
            break;
378
499
        case XML_ERR_XMLDECL_NOT_STARTED:
379
499
            errmsg = "Text declaration '<?xml' required";
380
499
            break;
381
94.2k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
94.2k
            errmsg = "parsing XML declaration: '?>' expected";
383
94.2k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
259k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
259k
            errmsg = "EntityRef: expecting ';'";
389
259k
            break;
390
15.7k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
15.7k
            errmsg = "DOCTYPE improperly terminated";
392
15.7k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
5.87k
        case XML_ERR_EQUAL_REQUIRED:
397
5.87k
            errmsg = "expected '='";
398
5.87k
            break;
399
26.6k
        case XML_ERR_STRING_NOT_CLOSED:
400
26.6k
            errmsg = "String not closed expecting \" or '";
401
26.6k
            break;
402
5.74k
        case XML_ERR_STRING_NOT_STARTED:
403
5.74k
            errmsg = "String not started expecting ' or \"";
404
5.74k
            break;
405
735
        case XML_ERR_ENCODING_NAME:
406
735
            errmsg = "Invalid XML encoding name";
407
735
            break;
408
1.26k
        case XML_ERR_STANDALONE_VALUE:
409
1.26k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.26k
            break;
411
23.5k
        case XML_ERR_DOCUMENT_EMPTY:
412
23.5k
            errmsg = "Document is empty";
413
23.5k
            break;
414
166k
        case XML_ERR_DOCUMENT_END:
415
166k
            errmsg = "Extra content at the end of the document";
416
166k
            break;
417
8.20k
        case XML_ERR_NOT_WELL_BALANCED:
418
8.20k
            errmsg = "chunk is not well balanced";
419
8.20k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
55.3k
        case XML_ERR_VERSION_MISSING:
424
55.3k
            errmsg = "Malformed declaration expecting version";
425
55.3k
            break;
426
3.30k
        case XML_ERR_NAME_TOO_LONG:
427
3.30k
            errmsg = "Name too long";
428
3.30k
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
12.0k
        default:
435
12.0k
            errmsg = "Unregistered error message";
436
4.51M
    }
437
4.51M
    if (ctxt != NULL)
438
4.51M
  ctxt->errNo = error;
439
4.51M
    if (info == NULL) {
440
2.58M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
2.58M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
2.58M
                        errmsg);
443
2.58M
    } else {
444
1.93M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
1.93M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
1.93M
                        errmsg, info);
447
1.93M
    }
448
4.51M
    if (ctxt != NULL) {
449
4.51M
  ctxt->wellFormed = 0;
450
4.51M
  if (ctxt->recovery == 0)
451
628k
      ctxt->disableSAX = 1;
452
4.51M
    }
453
4.51M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
16.3M
{
467
16.3M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
16.3M
        (ctxt->instate == XML_PARSER_EOF))
469
6
  return;
470
16.3M
    if (ctxt != NULL)
471
16.3M
  ctxt->errNo = error;
472
16.3M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
16.3M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
16.3M
    if (ctxt != NULL) {
475
16.3M
  ctxt->wellFormed = 0;
476
16.3M
  if (ctxt->recovery == 0)
477
4.94M
      ctxt->disableSAX = 1;
478
16.3M
    }
479
16.3M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
2.75M
{
495
2.75M
    xmlStructuredErrorFunc schannel = NULL;
496
497
2.75M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
2.75M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
2.75M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
2.75M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
1.60M
        schannel = ctxt->sax->serror;
503
2.75M
    if (ctxt != NULL) {
504
2.75M
        __xmlRaiseError(schannel,
505
2.75M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
2.75M
                    ctxt->userData,
507
2.75M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
2.75M
                    XML_ERR_WARNING, NULL, 0,
509
2.75M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
2.75M
        msg, (const char *) str1, (const char *) str2);
511
2.75M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
2.75M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
3.60M
{
533
3.60M
    xmlStructuredErrorFunc schannel = NULL;
534
535
3.60M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
3.60M
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
3.60M
    if (ctxt != NULL) {
539
3.60M
  ctxt->errNo = error;
540
3.60M
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
306k
      schannel = ctxt->sax->serror;
542
3.60M
    }
543
3.60M
    if (ctxt != NULL) {
544
3.60M
        __xmlRaiseError(schannel,
545
3.60M
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
3.60M
                    ctxt, NULL, XML_FROM_DTD, error,
547
3.60M
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
3.60M
        (const char *) str2, NULL, 0, 0,
549
3.60M
        msg, (const char *) str1, (const char *) str2);
550
3.60M
  ctxt->valid = 0;
551
3.60M
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
3.60M
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
1.13G
{
573
1.13G
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
1.13G
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
1.13G
    if (ctxt != NULL)
577
1.13G
  ctxt->errNo = error;
578
1.13G
    __xmlRaiseError(NULL, NULL, NULL,
579
1.13G
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
1.13G
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
1.13G
    if (ctxt != NULL) {
582
1.13G
  ctxt->wellFormed = 0;
583
1.13G
  if (ctxt->recovery == 0)
584
3.36M
      ctxt->disableSAX = 1;
585
1.13G
    }
586
1.13G
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
1.86M
{
604
1.86M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
1.86M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
1.86M
    if (ctxt != NULL)
608
1.86M
  ctxt->errNo = error;
609
1.86M
    __xmlRaiseError(NULL, NULL, NULL,
610
1.86M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
1.86M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
1.86M
        NULL, val, 0, msg, str1, val, str2);
613
1.86M
    if (ctxt != NULL) {
614
1.86M
  ctxt->wellFormed = 0;
615
1.86M
  if (ctxt->recovery == 0)
616
313k
      ctxt->disableSAX = 1;
617
1.86M
    }
618
1.86M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
13.0M
{
633
13.0M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
13.0M
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
13.0M
    if (ctxt != NULL)
637
13.0M
  ctxt->errNo = error;
638
13.0M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
13.0M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
13.0M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
13.0M
                    val);
642
13.0M
    if (ctxt != NULL) {
643
13.0M
  ctxt->wellFormed = 0;
644
13.0M
  if (ctxt->recovery == 0)
645
3.00M
      ctxt->disableSAX = 1;
646
13.0M
    }
647
13.0M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
787k
{
662
787k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
787k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
787k
    if (ctxt != NULL)
666
787k
  ctxt->errNo = error;
667
787k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
787k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
787k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
787k
                    val);
671
787k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
2.40M
{
689
2.40M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
2.40M
        (ctxt->instate == XML_PARSER_EOF))
691
31
  return;
692
2.40M
    if (ctxt != NULL)
693
2.40M
  ctxt->errNo = error;
694
2.40M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
2.40M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
2.40M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
2.40M
                    info1, info2, info3);
698
2.40M
    if (ctxt != NULL)
699
2.40M
  ctxt->nsWellFormed = 0;
700
2.40M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
158k
{
718
158k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
158k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
158k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
158k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
158k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
158k
                    info1, info2, info3);
725
158k
}
726
727
static void
728
222M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
222M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
222M
    else
732
222M
        *dst += val;
733
222M
}
734
735
static void
736
63.6M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
63.6M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
63.6M
    else
740
63.6M
        *dst += val;
741
63.6M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
63.0M
{
770
63.0M
    unsigned long consumed;
771
63.0M
    xmlParserInputPtr input = ctxt->input;
772
63.0M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
63.0M
    consumed = input->parentConsumed;
779
63.0M
    if ((entity == NULL) ||
780
63.0M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
32.8M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
32.8M
        xmlSaturatedAdd(&consumed, input->consumed);
783
32.8M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
32.8M
    }
785
63.0M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
63.0M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
63.0M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
63.0M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
63.0M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
1.31k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
1.31k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
1.31k
                       "Maximum entity amplification factor exceeded");
803
1.31k
        xmlHaltParser(ctxt);
804
1.31k
        return(1);
805
1.31k
    }
806
807
63.0M
    return(0);
808
63.0M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
3.72M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
3.72M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
3.72M
    (void) sax;
1048
1049
3.72M
    if (ctxt == NULL) return;
1050
3.72M
    sax = ctxt->sax;
1051
3.72M
#ifdef LIBXML_SAX1_ENABLED
1052
3.72M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
3.72M
        ((sax->startElementNs != NULL) ||
1054
2.21M
         (sax->endElementNs != NULL) ||
1055
2.21M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
2.21M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
3.72M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
3.72M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
3.72M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
3.72M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
3.72M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
3.72M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
353k
{
1103
353k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
838k
    while (*src == 0x20) src++;
1107
570M
    while (*src != 0) {
1108
570M
  if (*src == 0x20) {
1109
7.67M
      while (*src == 0x20) src++;
1110
346k
      if (*src != 0)
1111
294k
    *dst++ = 0x20;
1112
570M
  } else {
1113
570M
      *dst++ = *src++;
1114
570M
  }
1115
570M
    }
1116
353k
    *dst = 0;
1117
353k
    if (dst == src)
1118
281k
       return(NULL);
1119
71.6k
    return(dst);
1120
353k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
144k
{
1136
144k
    int i;
1137
144k
    int remove_head = 0;
1138
144k
    int need_realloc = 0;
1139
144k
    const xmlChar *cur;
1140
1141
144k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
144k
    i = *len;
1144
144k
    if (i <= 0)
1145
5.42k
        return(NULL);
1146
1147
138k
    cur = src;
1148
525k
    while (*cur == 0x20) {
1149
386k
        cur++;
1150
386k
  remove_head++;
1151
386k
    }
1152
42.0M
    while (*cur != 0) {
1153
41.9M
  if (*cur == 0x20) {
1154
117k
      cur++;
1155
117k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
33.8k
          need_realloc = 1;
1157
33.8k
    break;
1158
33.8k
      }
1159
117k
  } else
1160
41.8M
      cur++;
1161
41.9M
    }
1162
138k
    if (need_realloc) {
1163
33.8k
        xmlChar *ret;
1164
1165
33.8k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
33.8k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
33.8k
  xmlAttrNormalizeSpace(ret, ret);
1171
33.8k
  *len = strlen((const char *)ret);
1172
33.8k
        return(ret);
1173
104k
    } else if (remove_head) {
1174
5.33k
        *len -= remove_head;
1175
5.33k
        memmove(src, src + remove_head, 1 + *len);
1176
5.33k
  return(src);
1177
5.33k
    }
1178
99.4k
    return(NULL);
1179
138k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
268k
               const xmlChar *value) {
1195
268k
    xmlDefAttrsPtr defaults;
1196
268k
    int len;
1197
268k
    const xmlChar *name;
1198
268k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
268k
    if (ctxt->attsSpecial != NULL) {
1204
191k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
35.1k
      return;
1206
191k
    }
1207
1208
233k
    if (ctxt->attsDefault == NULL) {
1209
82.3k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
82.3k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
82.3k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
233k
    name = xmlSplitQName3(fullname, &len);
1219
233k
    if (name == NULL) {
1220
215k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
215k
  prefix = NULL;
1222
215k
    } else {
1223
18.1k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
18.1k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
18.1k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
233k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
233k
    if (defaults == NULL) {
1232
112k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
112k
                     (4 * 5) * sizeof(const xmlChar *));
1234
112k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
112k
  defaults->nbAttrs = 0;
1237
112k
  defaults->maxAttrs = 4;
1238
112k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
112k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
120k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
11.7k
        xmlDefAttrsPtr temp;
1245
1246
11.7k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
11.7k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
11.7k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
11.7k
  defaults = temp;
1251
11.7k
  defaults->maxAttrs *= 2;
1252
11.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
11.7k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
11.7k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
233k
    name = xmlSplitQName3(fullattr, &len);
1264
233k
    if (name == NULL) {
1265
180k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
180k
  prefix = NULL;
1267
180k
    } else {
1268
53.3k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
53.3k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
53.3k
    }
1271
1272
233k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
233k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
233k
    len = xmlStrlen(value);
1276
233k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
233k
    if (value == NULL)
1278
0
        goto mem_error;
1279
233k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
233k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
233k
    if (ctxt->external)
1282
33.7k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
199k
    else
1284
199k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
233k
    defaults->nbAttrs++;
1286
1287
233k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
233k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
1.49M
{
1309
1.49M
    if (ctxt->attsSpecial == NULL) {
1310
121k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
121k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
121k
    }
1314
1315
1.49M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
229k
        return;
1317
1318
1.26M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
1.26M
                     (void *) (ptrdiff_t) type);
1320
1.26M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
1.49M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
928k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
928k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
928k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
421k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
421k
    }
1341
928k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
320k
{
1354
320k
    if (ctxt->attsSpecial == NULL)
1355
233k
        return;
1356
1357
87.3k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
87.3k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
30.4k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
30.4k
        ctxt->attsSpecial = NULL;
1362
30.4k
    }
1363
87.3k
    return;
1364
320k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
305k
{
1427
305k
    const xmlChar *cur = lang, *nxt;
1428
1429
305k
    if (cur == NULL)
1430
4.43k
        return (0);
1431
300k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
300k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
300k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
300k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
10.3k
        cur += 2;
1441
54.5k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
54.5k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
44.2k
            cur++;
1444
10.3k
        return(cur[0] == 0);
1445
10.3k
    }
1446
290k
    nxt = cur;
1447
1.24M
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
1.24M
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
955k
           nxt++;
1450
290k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
28.0k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
22.0k
            return(0);
1456
5.95k
        return(1);
1457
28.0k
    }
1458
262k
    if (nxt - cur < 2)
1459
32.5k
        return(0);
1460
    /* we got an ISO 639 code */
1461
229k
    if (nxt[0] == 0)
1462
37.5k
        return(1);
1463
192k
    if (nxt[0] != '-')
1464
9.46k
        return(0);
1465
1466
182k
    nxt++;
1467
182k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
182k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
17.2k
        goto region_m49;
1471
1472
1.50M
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
1.50M
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
1.33M
           nxt++;
1475
165k
    if (nxt - cur == 4)
1476
35.8k
        goto script;
1477
129k
    if (nxt - cur == 2)
1478
27.4k
        goto region;
1479
102k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
11.4k
        goto variant;
1481
90.8k
    if (nxt - cur != 3)
1482
31.6k
        return(0);
1483
    /* we parsed an extlang */
1484
59.1k
    if (nxt[0] == 0)
1485
4.31k
        return(1);
1486
54.8k
    if (nxt[0] != '-')
1487
3.72k
        return(0);
1488
1489
51.1k
    nxt++;
1490
51.1k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
51.1k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
11.9k
        goto region_m49;
1494
1495
216k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
216k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
177k
           nxt++;
1498
39.2k
    if (nxt - cur == 2)
1499
4.61k
        goto region;
1500
34.6k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
10.6k
        goto variant;
1502
24.0k
    if (nxt - cur != 4)
1503
12.1k
        return(0);
1504
    /* we parsed a script */
1505
47.7k
script:
1506
47.7k
    if (nxt[0] == 0)
1507
9.08k
        return(1);
1508
38.6k
    if (nxt[0] != '-')
1509
12.7k
        return(0);
1510
1511
25.9k
    nxt++;
1512
25.9k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
25.9k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
9.31k
        goto region_m49;
1516
1517
136k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
136k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
119k
           nxt++;
1520
1521
16.6k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
2.81k
        goto variant;
1523
13.8k
    if (nxt - cur != 2)
1524
9.61k
        return(0);
1525
    /* we parsed a region */
1526
55.5k
region:
1527
55.5k
    if (nxt[0] == 0)
1528
9.18k
        return(1);
1529
46.3k
    if (nxt[0] != '-')
1530
21.7k
        return(0);
1531
1532
24.5k
    nxt++;
1533
24.5k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
168k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
168k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
143k
           nxt++;
1538
1539
24.5k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
7.08k
        return(0);
1541
1542
    /* we parsed a variant */
1543
42.3k
variant:
1544
42.3k
    if (nxt[0] == 0)
1545
28.3k
        return(1);
1546
14.0k
    if (nxt[0] != '-')
1547
7.10k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
6.95k
    return (1);
1550
1551
38.5k
region_m49:
1552
38.5k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
38.5k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
19.2k
        nxt += 3;
1555
19.2k
        goto region;
1556
19.2k
    }
1557
19.2k
    return(0);
1558
38.5k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
1.06M
{
1584
1.06M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
377k
        int i;
1586
1.92M
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
1.87M
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
328k
          if (ctxt->nsTab[i + 1] == URL)
1590
70.1k
        return(-2);
1591
    /* out of scope keep it */
1592
258k
    break;
1593
328k
      }
1594
1.87M
  }
1595
377k
    }
1596
996k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
81.3k
  ctxt->nsMax = 10;
1598
81.3k
  ctxt->nsNr = 0;
1599
81.3k
  ctxt->nsTab = (const xmlChar **)
1600
81.3k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
81.3k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
915k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
24.4k
        const xmlChar ** tmp;
1608
24.4k
        ctxt->nsMax *= 2;
1609
24.4k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
24.4k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
24.4k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
24.4k
  ctxt->nsTab = tmp;
1617
24.4k
    }
1618
996k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
996k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
996k
    return (ctxt->nsNr);
1621
996k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
266k
{
1634
266k
    int i;
1635
1636
266k
    if (ctxt->nsTab == NULL) return(0);
1637
266k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
266k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
874k
    for (i = 0;i < nr;i++) {
1645
608k
         ctxt->nsNr--;
1646
608k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
608k
    }
1648
266k
    return(nr);
1649
266k
}
1650
#endif
1651
1652
static int
1653
163k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
163k
    const xmlChar **atts;
1655
163k
    int *attallocs;
1656
163k
    int maxatts;
1657
1658
163k
    if (nr + 5 > ctxt->maxatts) {
1659
163k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
163k
  atts = (const xmlChar **) xmlMalloc(
1661
163k
             maxatts * sizeof(const xmlChar *));
1662
163k
  if (atts == NULL) goto mem_error;
1663
163k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
163k
                               (maxatts / 5) * sizeof(int));
1665
163k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
163k
        if (ctxt->maxatts > 0)
1670
647
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
163k
        xmlFree(ctxt->atts);
1672
163k
  ctxt->atts = atts;
1673
163k
  ctxt->attallocs = attallocs;
1674
163k
  ctxt->maxatts = maxatts;
1675
163k
    }
1676
163k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
163k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
31.2M
{
1694
31.2M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
31.2M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
825
        size_t newSize = ctxt->inputMax * 2;
1698
825
        xmlParserInputPtr *tmp;
1699
1700
825
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
825
                                               newSize * sizeof(*tmp));
1702
825
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
825
        ctxt->inputTab = tmp;
1707
825
        ctxt->inputMax = newSize;
1708
825
    }
1709
31.2M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
31.2M
    ctxt->input = value;
1711
31.2M
    return (ctxt->inputNr++);
1712
31.2M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
33.9M
{
1724
33.9M
    xmlParserInputPtr ret;
1725
1726
33.9M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
33.9M
    if (ctxt->inputNr <= 0)
1729
2.75M
        return (NULL);
1730
31.1M
    ctxt->inputNr--;
1731
31.1M
    if (ctxt->inputNr > 0)
1732
30.2M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
939k
    else
1734
939k
        ctxt->input = NULL;
1735
31.1M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
31.1M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
31.1M
    return (ret);
1738
33.9M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
17.1M
{
1751
17.1M
    if (ctxt == NULL) return(0);
1752
17.1M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
42.3k
        xmlNodePtr *tmp;
1754
1755
42.3k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
42.3k
                                      ctxt->nodeMax * 2 *
1757
42.3k
                                      sizeof(ctxt->nodeTab[0]));
1758
42.3k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
42.3k
        ctxt->nodeTab = tmp;
1763
42.3k
  ctxt->nodeMax *= 2;
1764
42.3k
    }
1765
17.1M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
17.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
381
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
381
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
381
        xmlParserMaxDepth);
1770
381
  xmlHaltParser(ctxt);
1771
381
  return(-1);
1772
381
    }
1773
17.1M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
17.1M
    ctxt->node = value;
1775
17.1M
    return (ctxt->nodeNr++);
1776
17.1M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
15.9M
{
1789
15.9M
    xmlNodePtr ret;
1790
1791
15.9M
    if (ctxt == NULL) return(NULL);
1792
15.9M
    if (ctxt->nodeNr <= 0)
1793
469k
        return (NULL);
1794
15.4M
    ctxt->nodeNr--;
1795
15.4M
    if (ctxt->nodeNr > 0)
1796
13.8M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
1.58M
    else
1798
1.58M
        ctxt->node = NULL;
1799
15.4M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
15.4M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
15.4M
    return (ret);
1802
15.9M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
12.7M
{
1821
12.7M
    xmlStartTag *tag;
1822
1823
12.7M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
97.7k
        const xmlChar * *tmp;
1825
97.7k
        xmlStartTag *tmp2;
1826
97.7k
        ctxt->nameMax *= 2;
1827
97.7k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
97.7k
                                    ctxt->nameMax *
1829
97.7k
                                    sizeof(ctxt->nameTab[0]));
1830
97.7k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
97.7k
  ctxt->nameTab = tmp;
1835
97.7k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
97.7k
                                    ctxt->nameMax *
1837
97.7k
                                    sizeof(ctxt->pushTab[0]));
1838
97.7k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
97.7k
  ctxt->pushTab = tmp2;
1843
12.6M
    } else if (ctxt->pushTab == NULL) {
1844
521k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
521k
                                            sizeof(ctxt->pushTab[0]));
1846
521k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
521k
    }
1849
12.7M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
12.7M
    ctxt->name = value;
1851
12.7M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
12.7M
    tag->prefix = prefix;
1853
12.7M
    tag->URI = URI;
1854
12.7M
    tag->line = line;
1855
12.7M
    tag->nsNr = nsNr;
1856
12.7M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
12.7M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
2.20M
{
1873
2.20M
    const xmlChar *ret;
1874
1875
2.20M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
2.20M
    ctxt->nameNr--;
1878
2.20M
    if (ctxt->nameNr > 0)
1879
2.16M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
38.0k
    else
1881
38.0k
        ctxt->name = NULL;
1882
2.20M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
2.20M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
2.20M
    return (ret);
1885
2.20M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
5.59M
{
1931
5.59M
    const xmlChar *ret;
1932
1933
5.59M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
5.59M
    ctxt->nameNr--;
1936
5.59M
    if (ctxt->nameNr > 0)
1937
5.36M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
231k
    else
1939
231k
        ctxt->name = NULL;
1940
5.59M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
5.59M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
5.59M
    return (ret);
1943
5.59M
}
1944
1945
20.9M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
20.9M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
49.6k
        int *tmp;
1948
1949
49.6k
  ctxt->spaceMax *= 2;
1950
49.6k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
49.6k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
49.6k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
49.6k
  ctxt->spaceTab = tmp;
1958
49.6k
    }
1959
20.9M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
20.9M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
20.9M
    return(ctxt->spaceNr++);
1962
20.9M
}
1963
1964
19.5M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
19.5M
    int ret;
1966
19.5M
    if (ctxt->spaceNr <= 0) return(0);
1967
19.2M
    ctxt->spaceNr--;
1968
19.2M
    if (ctxt->spaceNr > 0)
1969
18.5M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
744k
    else
1971
744k
        ctxt->space = &ctxt->spaceTab[0];
1972
19.2M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
19.2M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
19.2M
    return(ret);
1975
19.5M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
510M
#define RAW (*ctxt->input->cur)
2013
1.44G
#define CUR (*ctxt->input->cur)
2014
2.85G
#define NXT(val) ctxt->input->cur[(val)]
2015
41.4M
#define CUR_PTR ctxt->input->cur
2016
4.36M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
92.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
46.5M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
87.3M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
78.7M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
71.0M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
63.8M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
30.3M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
30.3M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
177k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
177k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
121M
#define SKIP(val) do {             \
2037
121M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
121M
    if (*ctxt->input->cur == 0)           \
2039
121M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
121M
  } while (0)
2041
2042
113k
#define SKIPL(val) do {             \
2043
113k
    int skipl;                \
2044
25.9M
    for(skipl=0; skipl<val; skipl++) {         \
2045
25.8M
  if (*(ctxt->input->cur) == '\n') {       \
2046
1.71M
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
24.1M
  } else ctxt->input->col++;         \
2048
25.8M
  ctxt->input->cur++;           \
2049
25.8M
    }                  \
2050
113k
    if (*ctxt->input->cur == 0)           \
2051
113k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
113k
  } while (0)
2053
2054
3.56G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
3.56G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
3.56G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
3.56G
  xmlSHRINK (ctxt);
2058
2059
1.78M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
1.78M
    if ((ctxt->input->buf) &&
2062
1.78M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
5.76k
        xmlParserInputShrink(ctxt->input);
2064
1.78M
    if (*ctxt->input->cur == 0)
2065
55.3k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
1.78M
}
2067
2068
6.66G
#define GROW if ((ctxt->progressive == 0) &&       \
2069
6.66G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
6.66G
  xmlGROW (ctxt);
2071
2072
79.8M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
79.8M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
79.8M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
79.8M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
79.8M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
79.8M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
79.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
79.8M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
79.8M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
79.8M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
79.8M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
734k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
79.8M
}
2095
2096
137M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
1.39G
#define NEXT xmlNextChar(ctxt)
2099
2100
27.9M
#define NEXT1 {               \
2101
27.9M
  ctxt->input->col++;           \
2102
27.9M
  ctxt->input->cur++;           \
2103
27.9M
  if (*ctxt->input->cur == 0)         \
2104
27.9M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
27.9M
    }
2106
2107
5.11G
#define NEXTL(l) do {             \
2108
5.11G
    if (*(ctxt->input->cur) == '\n') {         \
2109
185M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
4.92G
    } else ctxt->input->col++;           \
2111
5.11G
    ctxt->input->cur += l;        \
2112
5.11G
  } while (0)
2113
2114
6.26G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
4.89G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
8.71G
    if (l == 1) b[i++] = v;           \
2119
8.71G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
137M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
137M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
137M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
137M
        (ctxt->instate == XML_PARSER_START)) {
2141
84.3M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
84.3M
  cur = ctxt->input->cur;
2146
89.2M
  while (IS_BLANK_CH(*cur)) {
2147
89.2M
      if (*cur == '\n') {
2148
34.6M
    ctxt->input->line++; ctxt->input->col = 1;
2149
54.5M
      } else {
2150
54.5M
    ctxt->input->col++;
2151
54.5M
      }
2152
89.2M
      cur++;
2153
89.2M
      if (res < INT_MAX)
2154
89.2M
    res++;
2155
89.2M
      if (*cur == 0) {
2156
151k
    ctxt->input->cur = cur;
2157
151k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
151k
    cur = ctxt->input->cur;
2159
151k
      }
2160
89.2M
  }
2161
84.3M
  ctxt->input->cur = cur;
2162
84.3M
    } else {
2163
52.9M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
207M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
207M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
86.6M
    NEXT;
2168
120M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
37.8M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
356k
                    break;
2174
37.5M
          xmlParsePEReference(ctxt);
2175
82.8M
            } else if (CUR == 0) {
2176
30.2M
                unsigned long consumed;
2177
30.2M
                xmlEntityPtr ent;
2178
2179
30.2M
                if (ctxt->inputNr <= 1)
2180
98.4k
                    break;
2181
2182
30.1M
                consumed = ctxt->input->consumed;
2183
30.1M
                xmlSaturatedAddSizeT(&consumed,
2184
30.1M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
30.1M
                ent = ctxt->input->entity;
2191
30.1M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
30.1M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
4.43k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
4.43k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
4.43k
                }
2197
2198
30.1M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
30.1M
                xmlPopInput(ctxt);
2201
52.5M
            } else {
2202
52.5M
                break;
2203
52.5M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
154M
      if (res < INT_MAX)
2213
154M
    res++;
2214
154M
        }
2215
52.9M
    }
2216
137M
    return(res);
2217
137M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
30.1M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
30.1M
    xmlParserInputPtr input;
2237
2238
30.1M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
30.1M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
30.1M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
30.1M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
30.1M
    input = inputPop(ctxt);
2247
30.1M
    if (input->entity != NULL)
2248
30.1M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
30.1M
    xmlFreeInputStream(input);
2250
30.1M
    if (*ctxt->input->cur == 0)
2251
14.6M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
30.1M
    return(CUR);
2253
30.1M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
30.2M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
30.2M
    int ret;
2267
30.2M
    if (input == NULL) return(-1);
2268
2269
30.2M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
30.2M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
30.2M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
30.2M
    ret = inputPush(ctxt, input);
2285
30.2M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
30.2M
    GROW;
2288
30.2M
    return(ret);
2289
30.2M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
2.24M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
2.24M
    int val = 0;
2311
2.24M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
2.24M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
2.24M
        (NXT(2) == 'x')) {
2318
1.48M
  SKIP(3);
2319
1.48M
  GROW;
2320
3.84M
  while (RAW != ';') { /* loop blocked by count */
2321
2.44M
      if (count++ > 20) {
2322
45.8k
    count = 0;
2323
45.8k
    GROW;
2324
45.8k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
45.8k
      }
2327
2.44M
      if ((RAW >= '0') && (RAW <= '9'))
2328
1.13M
          val = val * 16 + (CUR - '0');
2329
1.31M
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
852k
          val = val * 16 + (CUR - 'a') + 10;
2331
460k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
372k
          val = val * 16 + (CUR - 'A') + 10;
2333
87.6k
      else {
2334
87.6k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
87.6k
    val = 0;
2336
87.6k
    break;
2337
87.6k
      }
2338
2.36M
      if (val > 0x110000)
2339
529k
          val = 0x110000;
2340
2341
2.36M
      NEXT;
2342
2.36M
      count++;
2343
2.36M
  }
2344
1.48M
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
1.39M
      ctxt->input->col++;
2347
1.39M
      ctxt->input->cur++;
2348
1.39M
  }
2349
1.48M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
755k
  SKIP(2);
2351
755k
  GROW;
2352
3.14M
  while (RAW != ';') { /* loop blocked by count */
2353
2.47M
      if (count++ > 20) {
2354
48.4k
    count = 0;
2355
48.4k
    GROW;
2356
48.4k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
48.4k
      }
2359
2.47M
      if ((RAW >= '0') && (RAW <= '9'))
2360
2.38M
          val = val * 10 + (CUR - '0');
2361
87.9k
      else {
2362
87.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
87.9k
    val = 0;
2364
87.9k
    break;
2365
87.9k
      }
2366
2.38M
      if (val > 0x110000)
2367
511k
          val = 0x110000;
2368
2369
2.38M
      NEXT;
2370
2.38M
      count++;
2371
2.38M
  }
2372
755k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
667k
      ctxt->input->col++;
2375
667k
      ctxt->input->cur++;
2376
667k
  }
2377
755k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
2.24M
    if (val >= 0x110000) {
2389
5.24k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
5.24k
                "xmlParseCharRef: character reference out of bounds\n",
2391
5.24k
          val);
2392
2.23M
    } else if (IS_CHAR(val)) {
2393
2.04M
        return(val);
2394
2.04M
    } else {
2395
193k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
193k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
193k
                    val);
2398
193k
    }
2399
198k
    return(0);
2400
2.24M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
179k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
179k
    const xmlChar *ptr;
2423
179k
    xmlChar cur;
2424
179k
    int val = 0;
2425
2426
179k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
179k
    ptr = *str;
2428
179k
    cur = *ptr;
2429
179k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
52.6k
  ptr += 3;
2431
52.6k
  cur = *ptr;
2432
673k
  while (cur != ';') { /* Non input consuming loop */
2433
624k
      if ((cur >= '0') && (cur <= '9'))
2434
102k
          val = val * 16 + (cur - '0');
2435
522k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
120k
          val = val * 16 + (cur - 'a') + 10;
2437
401k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
398k
          val = val * 16 + (cur - 'A') + 10;
2439
3.70k
      else {
2440
3.70k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
3.70k
    val = 0;
2442
3.70k
    break;
2443
3.70k
      }
2444
621k
      if (val > 0x110000)
2445
550k
          val = 0x110000;
2446
2447
621k
      ptr++;
2448
621k
      cur = *ptr;
2449
621k
  }
2450
52.6k
  if (cur == ';')
2451
48.9k
      ptr++;
2452
127k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
127k
  ptr += 2;
2454
127k
  cur = *ptr;
2455
429k
  while (cur != ';') { /* Non input consuming loops */
2456
303k
      if ((cur >= '0') && (cur <= '9'))
2457
302k
          val = val * 10 + (cur - '0');
2458
1.42k
      else {
2459
1.42k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
1.42k
    val = 0;
2461
1.42k
    break;
2462
1.42k
      }
2463
302k
      if (val > 0x110000)
2464
1.20k
          val = 0x110000;
2465
2466
302k
      ptr++;
2467
302k
      cur = *ptr;
2468
302k
  }
2469
127k
  if (cur == ';')
2470
125k
      ptr++;
2471
127k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
179k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
179k
    if (val >= 0x110000) {
2483
853
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
853
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
853
                val);
2486
178k
    } else if (IS_CHAR(val)) {
2487
172k
        return(val);
2488
172k
    } else {
2489
6.53k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
6.53k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
6.53k
        val);
2492
6.53k
    }
2493
7.38k
    return(0);
2494
179k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.96M
#define growBuffer(buffer, n) {           \
2593
1.96M
    xmlChar *tmp;             \
2594
1.96M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.96M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.96M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.96M
    if (tmp == NULL) goto mem_error;         \
2598
1.96M
    buffer = tmp;             \
2599
1.96M
    buffer##_size = new_size;                                           \
2600
1.96M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
31.0M
                           int check) {
2617
31.0M
    xmlChar *buffer = NULL;
2618
31.0M
    size_t buffer_size = 0;
2619
31.0M
    size_t nbchars = 0;
2620
2621
31.0M
    xmlChar *current = NULL;
2622
31.0M
    xmlChar *rep = NULL;
2623
31.0M
    const xmlChar *last;
2624
31.0M
    xmlEntityPtr ent;
2625
31.0M
    int c,l;
2626
2627
31.0M
    if (str == NULL)
2628
18.4k
        return(NULL);
2629
31.0M
    last = str + len;
2630
2631
31.0M
    if (((ctxt->depth > 40) &&
2632
31.0M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
31.0M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
31.0M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
31.0M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
31.0M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
31.0M
    if (str < last)
2651
30.9M
  c = CUR_SCHAR(str, l);
2652
45.6k
    else
2653
45.6k
        c = 0;
2654
3.68G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
3.68G
           (c != end2) && (c != end3) &&
2656
3.68G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
3.65G
  if (c == 0) break;
2659
3.65G
        if ((c == '&') && (str[1] == '#')) {
2660
179k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
179k
      if (val == 0)
2662
7.38k
                goto int_error;
2663
172k
      COPY_BUF(0,buffer,nbchars,val);
2664
172k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
174
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
174
      }
2667
3.65G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
30.4M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
30.4M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
30.4M
      if ((ent != NULL) &&
2674
30.4M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
39.0k
    if (ent->content != NULL) {
2676
39.0k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
39.0k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
1.42k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
1.42k
        }
2680
39.0k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
30.4M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
28.9M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
312
                    goto int_error;
2688
2689
28.9M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
427
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
427
                    xmlHaltParser(ctxt);
2692
427
                    ent->content[0] = 0;
2693
427
                    goto int_error;
2694
427
                }
2695
2696
28.9M
                ent->flags |= XML_ENT_EXPANDING;
2697
28.9M
    ctxt->depth++;
2698
28.9M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
28.9M
                        ent->length, what, 0, 0, 0, check);
2700
28.9M
    ctxt->depth--;
2701
28.9M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
28.9M
    if (rep == NULL) {
2704
5.32k
                    ent->content[0] = 0;
2705
5.32k
                    goto int_error;
2706
5.32k
                }
2707
2708
28.9M
                current = rep;
2709
4.85G
                while (*current != 0) { /* non input consuming loop */
2710
4.82G
                    buffer[nbchars++] = *current++;
2711
4.82G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
1.84M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
1.84M
                    }
2714
4.82G
                }
2715
28.9M
                xmlFree(rep);
2716
28.9M
                rep = NULL;
2717
28.9M
      } else if (ent != NULL) {
2718
25.1k
    int i = xmlStrlen(ent->name);
2719
25.1k
    const xmlChar *cur = ent->name;
2720
2721
25.1k
    buffer[nbchars++] = '&';
2722
25.1k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
450
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
450
    }
2725
82.3k
    for (;i > 0;i--)
2726
57.1k
        buffer[nbchars++] = *cur++;
2727
25.1k
    buffer[nbchars++] = ';';
2728
25.1k
      }
2729
3.62G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
1.62M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
1.62M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
1.62M
      if (ent != NULL) {
2735
144k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
5.38k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
5.38k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
5.38k
      (ctxt->validate != 0)) {
2745
5.23k
      xmlLoadEntityContent(ctxt, ent);
2746
5.23k
        } else {
2747
153
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
153
      "not validating will not read content for PE entity %s\n",
2749
153
                          ent->name, NULL);
2750
153
        }
2751
5.38k
    }
2752
2753
144k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
180
                    goto int_error;
2755
2756
144k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
303
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
303
                    xmlHaltParser(ctxt);
2759
303
                    if (ent->content != NULL)
2760
300
                        ent->content[0] = 0;
2761
303
                    goto int_error;
2762
303
                }
2763
2764
144k
                ent->flags |= XML_ENT_EXPANDING;
2765
144k
    ctxt->depth++;
2766
144k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
144k
                        ent->length, what, 0, 0, 0, check);
2768
144k
    ctxt->depth--;
2769
144k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
144k
    if (rep == NULL) {
2772
3.61k
                    if (ent->content != NULL)
2773
306
                        ent->content[0] = 0;
2774
3.61k
                    goto int_error;
2775
3.61k
                }
2776
140k
                current = rep;
2777
1.62G
                while (*current != 0) { /* non input consuming loop */
2778
1.62G
                    buffer[nbchars++] = *current++;
2779
1.62G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
55.0k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
55.0k
                    }
2782
1.62G
                }
2783
140k
                xmlFree(rep);
2784
140k
                rep = NULL;
2785
140k
      }
2786
3.61G
  } else {
2787
3.61G
      COPY_BUF(l,buffer,nbchars,c);
2788
3.61G
      str += l;
2789
3.61G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
801k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
801k
      }
2792
3.61G
  }
2793
3.65G
  if (str < last)
2794
3.62G
      c = CUR_SCHAR(str, l);
2795
30.9M
  else
2796
30.9M
      c = 0;
2797
3.65G
    }
2798
30.9M
    buffer[nbchars] = 0;
2799
30.9M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
17.5k
int_error:
2804
17.5k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
17.5k
    if (buffer != NULL)
2807
17.5k
        xmlFree(buffer);
2808
17.5k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
10.2k
                           xmlChar end3) {
2836
10.2k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
10.2k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
10.2k
                                      end, end2, end3, 0));
2840
10.2k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
525k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
525k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
525k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
525k
                                      end, end2, end3, 0));
2868
525k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
15.8M
                     int blank_chars) {
2890
15.8M
    int i, ret;
2891
15.8M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
15.8M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
5.07M
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
10.7M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
10.7M
        (*(ctxt->space) == -2))
2905
7.06M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
3.72M
    if (blank_chars == 0) {
2911
24.9M
  for (i = 0;i < len;i++)
2912
23.7M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.87M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
3.05M
    if (ctxt->node == NULL) return(0);
2919
2.81M
    if (ctxt->myDoc != NULL) {
2920
2.81M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.81M
        if (ret == 0) return(1);
2922
2.67M
        if (ret == 1) return(0);
2923
2.67M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
2.65M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
2.56M
    if ((ctxt->node->children == NULL) &&
2930
2.56M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
2.55M
    lastChild = xmlGetLastChild(ctxt->node);
2933
2.55M
    if (lastChild == NULL) {
2934
803k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
803k
            (ctxt->node->content != NULL)) return(0);
2936
1.74M
    } else if (xmlNodeIsText(lastChild))
2937
64.3k
        return(0);
2938
1.68M
    else if ((ctxt->node->children != NULL) &&
2939
1.68M
             (xmlNodeIsText(ctxt->node->children)))
2940
44.5k
        return(0);
2941
2.44M
    return(1);
2942
2.55M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
14.4M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
14.4M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
14.4M
    xmlChar *buffer = NULL;
2973
14.4M
    int len = 0;
2974
14.4M
    int max = XML_MAX_NAMELEN;
2975
14.4M
    xmlChar *ret = NULL;
2976
14.4M
    const xmlChar *cur = name;
2977
14.4M
    int c;
2978
2979
14.4M
    if (prefix == NULL) return(NULL);
2980
14.4M
    *prefix = NULL;
2981
2982
14.4M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
14.4M
    if (cur[0] == ':')
2993
13.5k
  return(xmlStrdup(name));
2994
2995
14.3M
    c = *cur++;
2996
65.5M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
51.1M
  buf[len++] = c;
2998
51.1M
  c = *cur++;
2999
51.1M
    }
3000
14.3M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
63.1k
  max = len * 2;
3006
3007
63.1k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
63.1k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
63.1k
  memcpy(buffer, buf, len);
3013
1.37G
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
1.37G
      if (len + 10 > max) {
3015
224k
          xmlChar *tmp;
3016
3017
224k
    max *= 2;
3018
224k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
224k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
224k
    buffer = tmp;
3025
224k
      }
3026
1.37G
      buffer[len++] = c;
3027
1.37G
      c = *cur++;
3028
1.37G
  }
3029
63.1k
  buffer[len] = 0;
3030
63.1k
    }
3031
3032
14.3M
    if ((c == ':') && (*cur == 0)) {
3033
48.9k
        if (buffer != NULL)
3034
261
      xmlFree(buffer);
3035
48.9k
  *prefix = NULL;
3036
48.9k
  return(xmlStrdup(name));
3037
48.9k
    }
3038
3039
14.3M
    if (buffer == NULL)
3040
14.2M
  ret = xmlStrndup(buf, len);
3041
62.8k
    else {
3042
62.8k
  ret = buffer;
3043
62.8k
  buffer = NULL;
3044
62.8k
  max = XML_MAX_NAMELEN;
3045
62.8k
    }
3046
3047
3048
14.3M
    if (c == ':') {
3049
2.23M
  c = *cur;
3050
2.23M
        *prefix = ret;
3051
2.23M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
2.23M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
2.23M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
2.23M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
2.23M
        (c == '_') || (c == ':'))) {
3063
24.7k
      int l;
3064
24.7k
      int first = CUR_SCHAR(cur, l);
3065
3066
24.7k
      if (!IS_LETTER(first) && (first != '_')) {
3067
8.57k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
8.57k
          "Name %s is not XML Namespace compliant\n",
3069
8.57k
          name);
3070
8.57k
      }
3071
24.7k
  }
3072
2.23M
  cur++;
3073
3074
19.6M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
17.4M
      buf[len++] = c;
3076
17.4M
      c = *cur++;
3077
17.4M
  }
3078
2.23M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
59.8k
      max = len * 2;
3084
3085
59.8k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
59.8k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
59.8k
      memcpy(buffer, buf, len);
3091
875M
      while (c != 0) { /* tested bigname2.xml */
3092
875M
    if (len + 10 > max) {
3093
173k
        xmlChar *tmp;
3094
3095
173k
        max *= 2;
3096
173k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
173k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
173k
        buffer = tmp;
3103
173k
    }
3104
875M
    buffer[len++] = c;
3105
875M
    c = *cur++;
3106
875M
      }
3107
59.8k
      buffer[len] = 0;
3108
59.8k
  }
3109
3110
2.23M
  if (buffer == NULL)
3111
2.18M
      ret = xmlStrndup(buf, len);
3112
59.8k
  else {
3113
59.8k
      ret = buffer;
3114
59.8k
  }
3115
2.23M
    }
3116
3117
14.3M
    return(ret);
3118
14.3M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
36.0M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
36.0M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
34.1M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
34.1M
      (((c >= 'a') && (c <= 'z')) ||
3160
34.1M
       ((c >= 'A') && (c <= 'Z')) ||
3161
34.1M
       (c == '_') || (c == ':') ||
3162
34.1M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
34.1M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
34.1M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
34.1M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
34.1M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
34.1M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
34.1M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
34.1M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
34.1M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
34.1M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
34.1M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
34.1M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
30.6M
      return(1);
3175
34.1M
    } else {
3176
1.92M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
1.24M
      return(1);
3178
1.92M
    }
3179
4.18M
    return(0);
3180
36.0M
}
3181
3182
static int
3183
1.42G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
1.42G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
1.35G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
1.35G
      (((c >= 'a') && (c <= 'z')) ||
3191
1.35G
       ((c >= 'A') && (c <= 'Z')) ||
3192
1.35G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
1.35G
       (c == '_') || (c == ':') ||
3194
1.35G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
1.35G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
1.35G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
1.35G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
1.35G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
1.35G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
1.35G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
1.35G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
1.35G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
1.35G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
1.35G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
1.35G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
1.35G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
1.35G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
1.35G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
1.32G
       return(1);
3210
1.35G
    } else {
3211
68.1M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
68.1M
            (c == '.') || (c == '-') ||
3213
68.1M
      (c == '_') || (c == ':') ||
3214
68.1M
      (IS_COMBINING(c)) ||
3215
68.1M
      (IS_EXTENDER(c)))
3216
66.6M
      return(1);
3217
68.1M
    }
3218
32.4M
    return(0);
3219
1.42G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
8.49M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
8.49M
    int len = 0, l;
3227
8.49M
    int c;
3228
8.49M
    int count = 0;
3229
8.49M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
2.82M
                    XML_MAX_TEXT_LENGTH :
3231
8.49M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
8.49M
    GROW;
3241
8.49M
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
8.49M
    c = CUR_CHAR(l);
3244
8.49M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
5.57M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
5.57M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
5.47M
         ((c >= 'A') && (c <= 'Z')) ||
3252
5.47M
         (c == '_') || (c == ':') ||
3253
5.47M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
5.47M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
5.47M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
5.47M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
5.47M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
5.47M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
5.47M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
5.47M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
5.47M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
5.47M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
5.47M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
5.47M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
4.96M
      return(NULL);
3266
4.96M
  }
3267
618k
  len += l;
3268
618k
  NEXTL(l);
3269
618k
  c = CUR_CHAR(l);
3270
388M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
388M
         (((c >= 'a') && (c <= 'z')) ||
3272
388M
          ((c >= 'A') && (c <= 'Z')) ||
3273
388M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
388M
          (c == '_') || (c == ':') ||
3275
388M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
388M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
388M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
388M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
388M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
388M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
388M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
388M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
388M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
388M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
388M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
388M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
388M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
388M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
388M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
388M
    )) {
3291
387M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
3.75M
    count = 0;
3293
3.75M
    GROW;
3294
3.75M
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
3.75M
      }
3297
387M
            if (len <= INT_MAX - l)
3298
387M
          len += l;
3299
387M
      NEXTL(l);
3300
387M
      c = CUR_CHAR(l);
3301
387M
  }
3302
2.91M
    } else {
3303
2.91M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
2.91M
      (!IS_LETTER(c) && (c != '_') &&
3305
2.81M
       (c != ':'))) {
3306
2.24M
      return(NULL);
3307
2.24M
  }
3308
664k
  len += l;
3309
664k
  NEXTL(l);
3310
664k
  c = CUR_CHAR(l);
3311
3312
678M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
678M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
678M
    (c == '.') || (c == '-') ||
3315
678M
    (c == '_') || (c == ':') ||
3316
678M
    (IS_COMBINING(c)) ||
3317
678M
    (IS_EXTENDER(c)))) {
3318
677M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
6.57M
    count = 0;
3320
6.57M
    GROW;
3321
6.57M
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
6.57M
      }
3324
677M
            if (len <= INT_MAX - l)
3325
677M
          len += l;
3326
677M
      NEXTL(l);
3327
677M
      c = CUR_CHAR(l);
3328
677M
  }
3329
664k
    }
3330
1.28M
    if (len > maxLength) {
3331
615
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
615
        return(NULL);
3333
615
    }
3334
1.28M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
1.28M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
2.21k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
1.28M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
1.28M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
72.3M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
72.3M
    const xmlChar *in;
3370
72.3M
    const xmlChar *ret;
3371
72.3M
    size_t count = 0;
3372
72.3M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
17.2M
                       XML_MAX_TEXT_LENGTH :
3374
72.3M
                       XML_MAX_NAME_LENGTH;
3375
3376
72.3M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
72.3M
    in = ctxt->input->cur;
3386
72.3M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
72.3M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
72.3M
  (*in == '_') || (*in == ':')) {
3389
64.9M
  in++;
3390
3.12G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
3.12G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
3.12G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
3.12G
         (*in == '_') || (*in == '-') ||
3394
3.12G
         (*in == ':') || (*in == '.'))
3395
3.05G
      in++;
3396
64.9M
  if ((*in > 0) && (*in < 0x80)) {
3397
63.8M
      count = in - ctxt->input->cur;
3398
63.8M
            if (count > maxLength) {
3399
588
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
588
                return(NULL);
3401
588
            }
3402
63.8M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
63.8M
      ctxt->input->cur = in;
3404
63.8M
      ctxt->input->col += count;
3405
63.8M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
63.8M
      return(ret);
3408
63.8M
  }
3409
64.9M
    }
3410
    /* accelerator for special cases */
3411
8.49M
    return(xmlParseNameComplex(ctxt));
3412
72.3M
}
3413
3414
static const xmlChar *
3415
3.64M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
3.64M
    int len = 0, l;
3417
3.64M
    int c;
3418
3.64M
    int count = 0;
3419
3.64M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
599k
                    XML_MAX_TEXT_LENGTH :
3421
3.64M
                    XML_MAX_NAME_LENGTH;
3422
3.64M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
3.64M
    GROW;
3432
3.64M
    startPosition = CUR_PTR - BASE_PTR;
3433
3.64M
    c = CUR_CHAR(l);
3434
3.64M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
3.64M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
2.93M
  return(NULL);
3437
2.93M
    }
3438
3439
111M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
111M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
111M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
1.03M
      count = 0;
3443
1.03M
      GROW;
3444
1.03M
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
1.03M
  }
3447
111M
        if (len <= INT_MAX - l)
3448
111M
      len += l;
3449
111M
  NEXTL(l);
3450
111M
  c = CUR_CHAR(l);
3451
111M
  if (c == 0) {
3452
283k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
283k
      ctxt->input->cur -= l;
3459
283k
      GROW;
3460
283k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
283k
      ctxt->input->cur += l;
3463
283k
      c = CUR_CHAR(l);
3464
283k
  }
3465
111M
    }
3466
715k
    if (len > maxLength) {
3467
526
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
526
        return(NULL);
3469
526
    }
3470
714k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
715k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
32.5M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
32.5M
    const xmlChar *in, *e;
3491
32.5M
    const xmlChar *ret;
3492
32.5M
    size_t count = 0;
3493
32.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
5.72M
                       XML_MAX_TEXT_LENGTH :
3495
32.5M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
32.5M
    in = ctxt->input->cur;
3505
32.5M
    e = ctxt->input->end;
3506
32.5M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
32.5M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
32.5M
   (*in == '_')) && (in < e)) {
3509
29.5M
  in++;
3510
221M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
221M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
221M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
221M
          (*in == '_') || (*in == '-') ||
3514
221M
          (*in == '.')) && (in < e))
3515
191M
      in++;
3516
29.5M
  if (in >= e)
3517
7.33k
      goto complex;
3518
29.5M
  if ((*in > 0) && (*in < 0x80)) {
3519
28.9M
      count = in - ctxt->input->cur;
3520
28.9M
            if (count > maxLength) {
3521
553
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
553
                return(NULL);
3523
553
            }
3524
28.9M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
28.9M
      ctxt->input->cur = in;
3526
28.9M
      ctxt->input->col += count;
3527
28.9M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
28.9M
      return(ret);
3531
28.9M
  }
3532
29.5M
    }
3533
3.64M
complex:
3534
3.64M
    return(xmlParseNCNameComplex(ctxt));
3535
32.5M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
4.34M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
4.34M
    register const xmlChar *cmp = other;
3551
4.34M
    register const xmlChar *in;
3552
4.34M
    const xmlChar *ret;
3553
3554
4.34M
    GROW;
3555
4.34M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
4.34M
    in = ctxt->input->cur;
3559
20.6M
    while (*in != 0 && *in == *cmp) {
3560
16.3M
  ++in;
3561
16.3M
  ++cmp;
3562
16.3M
    }
3563
4.34M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
3.53M
  ctxt->input->col += in - ctxt->input->cur;
3566
3.53M
  ctxt->input->cur = in;
3567
3.53M
  return (const xmlChar*) 1;
3568
3.53M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
809k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
809k
    if (ret == other) {
3573
60.7k
  return (const xmlChar*) 1;
3574
60.7k
    }
3575
748k
    return ret;
3576
809k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
32.5M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
32.5M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
32.5M
    const xmlChar *cur = *str;
3600
32.5M
    int len = 0, l;
3601
32.5M
    int c;
3602
32.5M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
193k
                    XML_MAX_TEXT_LENGTH :
3604
32.5M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
32.5M
    c = CUR_SCHAR(cur, l);
3611
32.5M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
1.44M
  return(NULL);
3613
1.44M
    }
3614
3615
31.0M
    COPY_BUF(l,buf,len,c);
3616
31.0M
    cur += l;
3617
31.0M
    c = CUR_SCHAR(cur, l);
3618
468M
    while (xmlIsNameChar(ctxt, c)) {
3619
440M
  COPY_BUF(l,buf,len,c);
3620
440M
  cur += l;
3621
440M
  c = CUR_SCHAR(cur, l);
3622
440M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
3.19M
      xmlChar *buffer;
3628
3.19M
      int max = len * 2;
3629
3630
3.19M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
3.19M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
3.19M
      memcpy(buffer, buf, len);
3636
744M
      while (xmlIsNameChar(ctxt, c)) {
3637
741M
    if (len + 10 > max) {
3638
3.20M
        xmlChar *tmp;
3639
3640
3.20M
        max *= 2;
3641
3.20M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
3.20M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
3.20M
        buffer = tmp;
3648
3.20M
    }
3649
741M
    COPY_BUF(l,buffer,len,c);
3650
741M
    cur += l;
3651
741M
    c = CUR_SCHAR(cur, l);
3652
741M
                if (len > maxLength) {
3653
209
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
209
                    xmlFree(buffer);
3655
209
                    return(NULL);
3656
209
                }
3657
741M
      }
3658
3.19M
      buffer[len] = 0;
3659
3.19M
      *str = cur;
3660
3.19M
      return(buffer);
3661
3.19M
  }
3662
440M
    }
3663
27.8M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
27.8M
    *str = cur;
3668
27.8M
    return(xmlStrndup(buf, len));
3669
27.8M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
731k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
731k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
731k
    int len = 0, l;
3690
731k
    int c;
3691
731k
    int count = 0;
3692
731k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
240k
                    XML_MAX_TEXT_LENGTH :
3694
731k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
731k
    GROW;
3701
731k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
731k
    c = CUR_CHAR(l);
3704
3705
5.71M
    while (xmlIsNameChar(ctxt, c)) {
3706
5.00M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
5.00M
  COPY_BUF(l,buf,len,c);
3711
5.00M
  NEXTL(l);
3712
5.00M
  c = CUR_CHAR(l);
3713
5.00M
  if (c == 0) {
3714
5.99k
      count = 0;
3715
5.99k
      GROW;
3716
5.99k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
5.99k
            c = CUR_CHAR(l);
3719
5.99k
  }
3720
5.00M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
21.1k
      xmlChar *buffer;
3726
21.1k
      int max = len * 2;
3727
3728
21.1k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
21.1k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
21.1k
      memcpy(buffer, buf, len);
3734
96.5M
      while (xmlIsNameChar(ctxt, c)) {
3735
96.5M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
957k
        count = 0;
3737
957k
        GROW;
3738
957k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
957k
    }
3743
96.5M
    if (len + 10 > max) {
3744
51.6k
        xmlChar *tmp;
3745
3746
51.6k
        max *= 2;
3747
51.6k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
51.6k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
51.6k
        buffer = tmp;
3754
51.6k
    }
3755
96.5M
    COPY_BUF(l,buffer,len,c);
3756
96.5M
    NEXTL(l);
3757
96.5M
    c = CUR_CHAR(l);
3758
96.5M
                if (len > maxLength) {
3759
419
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
419
                    xmlFree(buffer);
3761
419
                    return(NULL);
3762
419
                }
3763
96.5M
      }
3764
20.7k
      buffer[len] = 0;
3765
20.7k
      return(buffer);
3766
21.1k
  }
3767
5.00M
    }
3768
710k
    if (len == 0)
3769
106k
        return(NULL);
3770
604k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
604k
    return(xmlStrndup(buf, len));
3775
604k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
538k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
538k
    xmlChar *buf = NULL;
3795
538k
    int len = 0;
3796
538k
    int size = XML_PARSER_BUFFER_SIZE;
3797
538k
    int c, l;
3798
538k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
87.0k
                    XML_MAX_HUGE_LENGTH :
3800
538k
                    XML_MAX_TEXT_LENGTH;
3801
538k
    xmlChar stop;
3802
538k
    xmlChar *ret = NULL;
3803
538k
    const xmlChar *cur = NULL;
3804
538k
    xmlParserInputPtr input;
3805
3806
538k
    if (RAW == '"') stop = '"';
3807
100k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
538k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
538k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
538k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
538k
    input = ctxt->input;
3824
538k
    GROW;
3825
538k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
538k
    NEXT;
3828
538k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
207M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
207M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
206M
  if (len + 5 >= size) {
3841
134k
      xmlChar *tmp;
3842
3843
134k
      size *= 2;
3844
134k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
134k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
134k
      buf = tmp;
3850
134k
  }
3851
206M
  COPY_BUF(l,buf,len,c);
3852
206M
  NEXTL(l);
3853
3854
206M
  GROW;
3855
206M
  c = CUR_CHAR(l);
3856
206M
  if (c == 0) {
3857
4.39k
      GROW;
3858
4.39k
      c = CUR_CHAR(l);
3859
4.39k
  }
3860
3861
206M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
206M
    }
3867
538k
    buf[len] = 0;
3868
538k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
538k
    if (c != stop) {
3871
6.46k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
6.46k
        goto error;
3873
6.46k
    }
3874
531k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
531k
    cur = buf;
3882
161M
    while (*cur != 0) { /* non input consuming */
3883
160M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
422k
      xmlChar *name;
3885
422k
      xmlChar tmp = *cur;
3886
422k
            int nameOk = 0;
3887
3888
422k
      cur++;
3889
422k
      name = xmlParseStringName(ctxt, &cur);
3890
422k
            if (name != NULL) {
3891
418k
                nameOk = 1;
3892
418k
                xmlFree(name);
3893
418k
            }
3894
422k
            if ((nameOk == 0) || (*cur != ';')) {
3895
13.5k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
13.5k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
13.5k
                            tmp);
3898
13.5k
                goto error;
3899
13.5k
      }
3900
409k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
409k
    (ctxt->inputNr == 1)) {
3902
5.90k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
5.90k
                goto error;
3904
5.90k
      }
3905
403k
      if (*cur == 0)
3906
0
          break;
3907
403k
  }
3908
160M
  cur++;
3909
160M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
512k
    ++ctxt->depth;
3920
512k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
512k
                                     0, 0, 0, /* check */ 1);
3922
512k
    --ctxt->depth;
3923
3924
512k
    if (orig != NULL) {
3925
512k
        *orig = buf;
3926
512k
        buf = NULL;
3927
512k
    }
3928
3929
538k
error:
3930
538k
    if (buf != NULL)
3931
25.9k
        xmlFree(buf);
3932
538k
    return(ret);
3933
512k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
2.56M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
2.56M
    xmlChar limit = 0;
3950
2.56M
    xmlChar *buf = NULL;
3951
2.56M
    xmlChar *rep = NULL;
3952
2.56M
    size_t len = 0;
3953
2.56M
    size_t buf_size = 0;
3954
2.56M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
865k
                       XML_MAX_HUGE_LENGTH :
3956
2.56M
                       XML_MAX_TEXT_LENGTH;
3957
2.56M
    int c, l, in_space = 0;
3958
2.56M
    xmlChar *current = NULL;
3959
2.56M
    xmlEntityPtr ent;
3960
3961
2.56M
    if (NXT(0) == '"') {
3962
1.96M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.96M
  limit = '"';
3964
1.96M
        NEXT;
3965
1.96M
    } else if (NXT(0) == '\'') {
3966
602k
  limit = '\'';
3967
602k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
602k
        NEXT;
3969
602k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
2.56M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
2.56M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
2.56M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
2.56M
    c = CUR_CHAR(l);
3985
2.58G
    while (((NXT(0) != limit) && /* checked */
3986
2.58G
            (IS_CHAR(c)) && (c != '<')) &&
3987
2.58G
            (ctxt->instate != XML_PARSER_EOF)) {
3988
2.58G
  if (c == '&') {
3989
4.79M
      in_space = 0;
3990
4.79M
      if (NXT(1) == '#') {
3991
1.27M
    int val = xmlParseCharRef(ctxt);
3992
3993
1.27M
    if (val == '&') {
3994
13.4k
        if (ctxt->replaceEntities) {
3995
7.86k
      if (len + 10 > buf_size) {
3996
116
          growBuffer(buf, 10);
3997
116
      }
3998
7.86k
      buf[len++] = '&';
3999
7.86k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
5.54k
      if (len + 10 > buf_size) {
4005
134
          growBuffer(buf, 10);
4006
134
      }
4007
5.54k
      buf[len++] = '&';
4008
5.54k
      buf[len++] = '#';
4009
5.54k
      buf[len++] = '3';
4010
5.54k
      buf[len++] = '8';
4011
5.54k
      buf[len++] = ';';
4012
5.54k
        }
4013
1.25M
    } else if (val != 0) {
4014
1.17M
        if (len + 10 > buf_size) {
4015
5.31k
      growBuffer(buf, 10);
4016
5.31k
        }
4017
1.17M
        len += xmlCopyChar(0, &buf[len], val);
4018
1.17M
    }
4019
3.52M
      } else {
4020
3.52M
    ent = xmlParseEntityRef(ctxt);
4021
3.52M
    if ((ent != NULL) &&
4022
3.52M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
220k
        if (len + 10 > buf_size) {
4024
176
      growBuffer(buf, 10);
4025
176
        }
4026
220k
        if ((ctxt->replaceEntities == 0) &&
4027
220k
            (ent->content[0] == '&')) {
4028
51.9k
      buf[len++] = '&';
4029
51.9k
      buf[len++] = '#';
4030
51.9k
      buf[len++] = '3';
4031
51.9k
      buf[len++] = '8';
4032
51.9k
      buf[len++] = ';';
4033
168k
        } else {
4034
168k
      buf[len++] = ent->content[0];
4035
168k
        }
4036
3.30M
    } else if ((ent != NULL) &&
4037
3.30M
               (ctxt->replaceEntities != 0)) {
4038
810k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
810k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
810k
      ++ctxt->depth;
4043
810k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
810k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
810k
                                /* check */ 1);
4046
810k
      --ctxt->depth;
4047
810k
      if (rep != NULL) {
4048
794k
          current = rep;
4049
140M
          while (*current != 0) { /* non input consuming */
4050
139M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
139M
                                    (*current == 0x9)) {
4052
7.70M
                                    buf[len++] = 0x20;
4053
7.70M
                                    current++;
4054
7.70M
                                } else
4055
132M
                                    buf[len++] = *current++;
4056
139M
        if (len + 10 > buf_size) {
4057
57.2k
            growBuffer(buf, 10);
4058
57.2k
        }
4059
139M
          }
4060
794k
          xmlFree(rep);
4061
794k
          rep = NULL;
4062
794k
      }
4063
810k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
2.49M
    } else if (ent != NULL) {
4071
403k
        int i = xmlStrlen(ent->name);
4072
403k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
403k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
403k
      (ent->content != NULL)) {
4081
342k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
18.6k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
18.6k
                            ctxt->sizeentcopy = ent->length;
4085
4086
18.6k
                            ++ctxt->depth;
4087
18.6k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
18.6k
                                    ent->content, ent->length,
4089
18.6k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
18.6k
                                    /* check */ 1);
4091
18.6k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
18.6k
                            if (ctxt->inSubset == 0) {
4100
14.0k
                                ent->flags |= XML_ENT_CHECKED;
4101
14.0k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
14.0k
                            }
4103
4104
18.6k
                            if (rep != NULL) {
4105
18.5k
                                xmlFree(rep);
4106
18.5k
                                rep = NULL;
4107
18.5k
                            } else {
4108
179
                                ent->content[0] = 0;
4109
179
                            }
4110
4111
18.6k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
3
                                goto error;
4113
323k
                        } else {
4114
323k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
2
                                goto error;
4116
323k
                        }
4117
342k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
403k
        buf[len++] = '&';
4123
405k
        while (len + i + 10 > buf_size) {
4124
2.97k
      growBuffer(buf, i + 10);
4125
2.97k
        }
4126
916k
        for (;i > 0;i--)
4127
513k
      buf[len++] = *cur++;
4128
403k
        buf[len++] = ';';
4129
403k
    }
4130
3.52M
      }
4131
2.57G
  } else {
4132
2.57G
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
181M
          if ((len != 0) || (!normalize)) {
4134
180M
        if ((!normalize) || (!in_space)) {
4135
179M
      COPY_BUF(l,buf,len,0x20);
4136
179M
      while (len + 10 > buf_size) {
4137
102k
          growBuffer(buf, 10);
4138
102k
      }
4139
179M
        }
4140
180M
        in_space = 1;
4141
180M
    }
4142
2.39G
      } else {
4143
2.39G
          in_space = 0;
4144
2.39G
    COPY_BUF(l,buf,len,c);
4145
2.39G
    if (len + 10 > buf_size) {
4146
1.06M
        growBuffer(buf, 10);
4147
1.06M
    }
4148
2.39G
      }
4149
2.57G
      NEXTL(l);
4150
2.57G
  }
4151
2.58G
  GROW;
4152
2.58G
  c = CUR_CHAR(l);
4153
2.58G
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
2.58G
    }
4159
2.56M
    if (ctxt->instate == XML_PARSER_EOF)
4160
736
        goto error;
4161
4162
2.56M
    if ((in_space) && (normalize)) {
4163
95.2k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
44.9k
    }
4165
2.56M
    buf[len] = 0;
4166
2.56M
    if (RAW == '<') {
4167
616k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.95M
    } else if (RAW != limit) {
4169
777k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
287k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
287k
         "invalid character in attribute value\n");
4172
490k
  } else {
4173
490k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
490k
         "AttValue: ' expected\n");
4175
490k
        }
4176
777k
    } else
4177
1.17M
  NEXT;
4178
4179
2.56M
    if (attlen != NULL) *attlen = len;
4180
2.56M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
741
error:
4185
741
    if (buf != NULL)
4186
741
        xmlFree(buf);
4187
741
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
741
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
6.95M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
6.95M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
6.95M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
6.95M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
304k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
304k
    xmlChar *buf = NULL;
4250
304k
    int len = 0;
4251
304k
    int size = XML_PARSER_BUFFER_SIZE;
4252
304k
    int cur, l;
4253
304k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
71.2k
                    XML_MAX_TEXT_LENGTH :
4255
304k
                    XML_MAX_NAME_LENGTH;
4256
304k
    xmlChar stop;
4257
304k
    int state = ctxt->instate;
4258
304k
    int count = 0;
4259
4260
304k
    SHRINK;
4261
304k
    if (RAW == '"') {
4262
279k
        NEXT;
4263
279k
  stop = '"';
4264
279k
    } else if (RAW == '\'') {
4265
13.5k
        NEXT;
4266
13.5k
  stop = '\'';
4267
13.5k
    } else {
4268
12.2k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
12.2k
  return(NULL);
4270
12.2k
    }
4271
4272
292k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
292k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
292k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
292k
    cur = CUR_CHAR(l);
4279
41.5M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
41.3M
  if (len + 5 >= size) {
4281
23.5k
      xmlChar *tmp;
4282
4283
23.5k
      size *= 2;
4284
23.5k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
23.5k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
23.5k
      buf = tmp;
4292
23.5k
  }
4293
41.3M
  count++;
4294
41.3M
  if (count > 50) {
4295
722k
      SHRINK;
4296
722k
      GROW;
4297
722k
      count = 0;
4298
722k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
722k
  }
4303
41.3M
  COPY_BUF(l,buf,len,cur);
4304
41.3M
  NEXTL(l);
4305
41.3M
  cur = CUR_CHAR(l);
4306
41.3M
  if (cur == 0) {
4307
5.92k
      GROW;
4308
5.92k
      SHRINK;
4309
5.92k
      cur = CUR_CHAR(l);
4310
5.92k
  }
4311
41.3M
        if (len > maxLength) {
4312
203
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
203
            xmlFree(buf);
4314
203
            ctxt->instate = (xmlParserInputState) state;
4315
203
            return(NULL);
4316
203
        }
4317
41.3M
    }
4318
292k
    buf[len] = 0;
4319
292k
    ctxt->instate = (xmlParserInputState) state;
4320
292k
    if (!IS_CHAR(cur)) {
4321
9.13k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
283k
    } else {
4323
283k
  NEXT;
4324
283k
    }
4325
292k
    return(buf);
4326
292k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
71.9k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
71.9k
    xmlChar *buf = NULL;
4344
71.9k
    int len = 0;
4345
71.9k
    int size = XML_PARSER_BUFFER_SIZE;
4346
71.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
14.6k
                    XML_MAX_TEXT_LENGTH :
4348
71.9k
                    XML_MAX_NAME_LENGTH;
4349
71.9k
    xmlChar cur;
4350
71.9k
    xmlChar stop;
4351
71.9k
    int count = 0;
4352
71.9k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
71.9k
    SHRINK;
4355
71.9k
    if (RAW == '"') {
4356
57.9k
        NEXT;
4357
57.9k
  stop = '"';
4358
57.9k
    } else if (RAW == '\'') {
4359
12.9k
        NEXT;
4360
12.9k
  stop = '\'';
4361
12.9k
    } else {
4362
1.02k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
1.02k
  return(NULL);
4364
1.02k
    }
4365
70.9k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
70.9k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
70.9k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
70.9k
    cur = CUR;
4372
20.0M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
19.9M
  if (len + 1 >= size) {
4374
10.2k
      xmlChar *tmp;
4375
4376
10.2k
      size *= 2;
4377
10.2k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
10.2k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
10.2k
      buf = tmp;
4384
10.2k
  }
4385
19.9M
  buf[len++] = cur;
4386
19.9M
  count++;
4387
19.9M
  if (count > 50) {
4388
360k
      SHRINK;
4389
360k
      GROW;
4390
360k
      count = 0;
4391
360k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
360k
  }
4396
19.9M
  NEXT;
4397
19.9M
  cur = CUR;
4398
19.9M
  if (cur == 0) {
4399
1.80k
      GROW;
4400
1.80k
      SHRINK;
4401
1.80k
      cur = CUR;
4402
1.80k
  }
4403
19.9M
        if (len > maxLength) {
4404
187
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
187
            xmlFree(buf);
4406
187
            return(NULL);
4407
187
        }
4408
19.9M
    }
4409
70.7k
    buf[len] = 0;
4410
70.7k
    if (cur != stop) {
4411
5.80k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
64.9k
    } else {
4413
64.9k
  NEXT;
4414
64.9k
    }
4415
70.7k
    ctxt->instate = oldstate;
4416
70.7k
    return(buf);
4417
70.9k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
1.15G
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
1.15G
    const xmlChar *in;
4482
1.15G
    int nbchar = 0;
4483
1.15G
    int line = ctxt->input->line;
4484
1.15G
    int col = ctxt->input->col;
4485
1.15G
    int ccol;
4486
4487
1.15G
    SHRINK;
4488
1.15G
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
1.15G
    in = ctxt->input->cur;
4494
1.16G
    do {
4495
1.17G
get_more_space:
4496
1.20G
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
1.17G
        if (*in == 0xA) {
4498
84.0M
            do {
4499
84.0M
                ctxt->input->line++; ctxt->input->col = 1;
4500
84.0M
                in++;
4501
84.0M
            } while (*in == 0xA);
4502
7.43M
            goto get_more_space;
4503
7.43M
        }
4504
1.16G
        if (*in == '<') {
4505
7.11M
            nbchar = in - ctxt->input->cur;
4506
7.11M
            if (nbchar > 0) {
4507
7.11M
                const xmlChar *tmp = ctxt->input->cur;
4508
7.11M
                ctxt->input->cur = in;
4509
4510
7.11M
                if ((ctxt->sax != NULL) &&
4511
7.11M
                    (ctxt->sax->ignorableWhitespace !=
4512
7.11M
                     ctxt->sax->characters)) {
4513
2.81M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.59M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.59M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.59M
                                                   tmp, nbchar);
4517
1.59M
                    } else {
4518
1.22M
                        if (ctxt->sax->characters != NULL)
4519
1.22M
                            ctxt->sax->characters(ctxt->userData,
4520
1.22M
                                                  tmp, nbchar);
4521
1.22M
                        if (*ctxt->space == -1)
4522
262k
                            *ctxt->space = -2;
4523
1.22M
                    }
4524
4.30M
                } else if ((ctxt->sax != NULL) &&
4525
4.30M
                           (ctxt->sax->characters != NULL)) {
4526
4.30M
                    ctxt->sax->characters(ctxt->userData,
4527
4.30M
                                          tmp, nbchar);
4528
4.30M
                }
4529
7.11M
            }
4530
7.11M
            return;
4531
7.11M
        }
4532
4533
1.16G
get_more:
4534
1.16G
        ccol = ctxt->input->col;
4535
2.04G
        while (test_char_data[*in]) {
4536
872M
            in++;
4537
872M
            ccol++;
4538
872M
        }
4539
1.16G
        ctxt->input->col = ccol;
4540
1.16G
        if (*in == 0xA) {
4541
39.7M
            do {
4542
39.7M
                ctxt->input->line++; ctxt->input->col = 1;
4543
39.7M
                in++;
4544
39.7M
            } while (*in == 0xA);
4545
7.18M
            goto get_more;
4546
7.18M
        }
4547
1.16G
        if (*in == ']') {
4548
2.53M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
41.3k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
41.3k
                ctxt->input->cur = in + 1;
4551
41.3k
                return;
4552
41.3k
            }
4553
2.49M
            in++;
4554
2.49M
            ctxt->input->col++;
4555
2.49M
            goto get_more;
4556
2.53M
        }
4557
1.15G
        nbchar = in - ctxt->input->cur;
4558
1.15G
        if (nbchar > 0) {
4559
23.2M
            if ((ctxt->sax != NULL) &&
4560
23.2M
                (ctxt->sax->ignorableWhitespace !=
4561
23.2M
                 ctxt->sax->characters) &&
4562
23.2M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
3.52M
                const xmlChar *tmp = ctxt->input->cur;
4564
3.52M
                ctxt->input->cur = in;
4565
4566
3.52M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
954k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
954k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
954k
                                                       tmp, nbchar);
4570
2.57M
                } else {
4571
2.57M
                    if (ctxt->sax->characters != NULL)
4572
2.57M
                        ctxt->sax->characters(ctxt->userData,
4573
2.57M
                                              tmp, nbchar);
4574
2.57M
                    if (*ctxt->space == -1)
4575
476k
                        *ctxt->space = -2;
4576
2.57M
                }
4577
3.52M
                line = ctxt->input->line;
4578
3.52M
                col = ctxt->input->col;
4579
19.7M
            } else if (ctxt->sax != NULL) {
4580
19.7M
                if (ctxt->sax->characters != NULL)
4581
19.7M
                    ctxt->sax->characters(ctxt->userData,
4582
19.7M
                                          ctxt->input->cur, nbchar);
4583
19.7M
                line = ctxt->input->line;
4584
19.7M
                col = ctxt->input->col;
4585
19.7M
            }
4586
23.2M
        }
4587
1.15G
        ctxt->input->cur = in;
4588
1.15G
        if (*in == 0xD) {
4589
8.36M
            in++;
4590
8.36M
            if (*in == 0xA) {
4591
8.07M
                ctxt->input->cur = in;
4592
8.07M
                in++;
4593
8.07M
                ctxt->input->line++; ctxt->input->col = 1;
4594
8.07M
                continue; /* while */
4595
8.07M
            }
4596
287k
            in--;
4597
287k
        }
4598
1.15G
        if (*in == '<') {
4599
14.5M
            return;
4600
14.5M
        }
4601
1.13G
        if (*in == '&') {
4602
1.54M
            return;
4603
1.54M
        }
4604
1.13G
        SHRINK;
4605
1.13G
        GROW;
4606
1.13G
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
1.13G
        in = ctxt->input->cur;
4609
1.14G
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
1.14G
             (*in == 0x09) || (*in == 0x0a));
4611
1.13G
    ctxt->input->line = line;
4612
1.13G
    ctxt->input->col = col;
4613
1.13G
    xmlParseCharDataComplex(ctxt);
4614
1.13G
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
1.13G
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
1.13G
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
1.13G
    int nbchar = 0;
4631
1.13G
    int cur, l;
4632
1.13G
    int count = 0;
4633
4634
1.13G
    SHRINK;
4635
1.13G
    GROW;
4636
1.13G
    cur = CUR_CHAR(l);
4637
1.95G
    while ((cur != '<') && /* checked */
4638
1.95G
           (cur != '&') &&
4639
1.95G
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
816M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
20.8k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
20.8k
  }
4643
816M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
816M
  NEXTL(l);
4646
816M
  cur = CUR_CHAR(l);
4647
816M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
2.55M
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
2.55M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
2.42M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
24.2k
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
24.2k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
24.2k
                                     buf, nbchar);
4658
2.40M
    } else {
4659
2.40M
        if (ctxt->sax->characters != NULL)
4660
2.40M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
2.40M
        if ((ctxt->sax->characters !=
4662
2.40M
             ctxt->sax->ignorableWhitespace) &&
4663
2.40M
      (*ctxt->space == -1))
4664
21.3k
      *ctxt->space = -2;
4665
2.40M
    }
4666
2.42M
      }
4667
2.55M
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
2.55M
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
2.55M
  }
4672
816M
  count++;
4673
816M
  if (count > 50) {
4674
14.3M
      SHRINK;
4675
14.3M
      GROW;
4676
14.3M
      count = 0;
4677
14.3M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
14.3M
  }
4680
816M
    }
4681
1.13G
    if (nbchar != 0) {
4682
8.10M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
8.10M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
7.09M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
12.0k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
12.0k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
7.08M
      } else {
4691
7.08M
    if (ctxt->sax->characters != NULL)
4692
7.08M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
7.08M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
7.08M
        (*ctxt->space == -1))
4695
383k
        *ctxt->space = -2;
4696
7.08M
      }
4697
7.09M
  }
4698
8.10M
    }
4699
1.13G
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
1.13G
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
1.13G
                          "PCDATA invalid Char value %d\n",
4703
1.13G
                    cur ? cur : CUR);
4704
1.13G
  NEXT;
4705
1.13G
    }
4706
1.13G
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
621k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
621k
    xmlChar *URI = NULL;
4735
4736
621k
    SHRINK;
4737
4738
621k
    *publicID = NULL;
4739
621k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
241k
        SKIP(6);
4741
241k
  if (SKIP_BLANKS == 0) {
4742
370
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
370
                     "Space required after 'SYSTEM'\n");
4744
370
  }
4745
241k
  URI = xmlParseSystemLiteral(ctxt);
4746
241k
  if (URI == NULL) {
4747
1.00k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
1.00k
        }
4749
379k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
71.9k
        SKIP(6);
4751
71.9k
  if (SKIP_BLANKS == 0) {
4752
463
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
463
        "Space required after 'PUBLIC'\n");
4754
463
  }
4755
71.9k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
71.9k
  if (*publicID == NULL) {
4757
1.21k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
1.21k
  }
4759
71.9k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
63.0k
      if (SKIP_BLANKS == 0) {
4764
11.0k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
11.0k
      "Space required after the Public Identifier\n");
4766
11.0k
      }
4767
63.0k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
8.92k
      if (SKIP_BLANKS == 0) return(NULL);
4775
393
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
393
  }
4777
63.1k
  URI = xmlParseSystemLiteral(ctxt);
4778
63.1k
  if (URI == NULL) {
4779
11.4k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
11.4k
        }
4781
63.1k
    }
4782
612k
    return(URI);
4783
621k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
226k
                       size_t len, size_t size) {
4802
226k
    int q, ql;
4803
226k
    int r, rl;
4804
226k
    int cur, l;
4805
226k
    size_t count = 0;
4806
226k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
70.0k
                       XML_MAX_HUGE_LENGTH :
4808
226k
                       XML_MAX_TEXT_LENGTH;
4809
226k
    int inputid;
4810
4811
226k
    inputid = ctxt->input->id;
4812
4813
226k
    if (buf == NULL) {
4814
12.0k
        len = 0;
4815
12.0k
  size = XML_PARSER_BUFFER_SIZE;
4816
12.0k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
12.0k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
12.0k
    }
4822
226k
    GROW; /* Assure there's enough input data */
4823
226k
    q = CUR_CHAR(ql);
4824
226k
    if (q == 0)
4825
73.9k
        goto not_terminated;
4826
152k
    if (!IS_CHAR(q)) {
4827
23.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
23.0k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
23.0k
                    q);
4830
23.0k
  xmlFree (buf);
4831
23.0k
  return;
4832
23.0k
    }
4833
129k
    NEXTL(ql);
4834
129k
    r = CUR_CHAR(rl);
4835
129k
    if (r == 0)
4836
5.77k
        goto not_terminated;
4837
123k
    if (!IS_CHAR(r)) {
4838
7.55k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
7.55k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
7.55k
                    r);
4841
7.55k
  xmlFree (buf);
4842
7.55k
  return;
4843
7.55k
    }
4844
116k
    NEXTL(rl);
4845
116k
    cur = CUR_CHAR(l);
4846
116k
    if (cur == 0)
4847
3.12k
        goto not_terminated;
4848
75.8M
    while (IS_CHAR(cur) && /* checked */
4849
75.8M
           ((cur != '>') ||
4850
75.7M
      (r != '-') || (q != '-'))) {
4851
75.7M
  if ((r == '-') && (q == '-')) {
4852
259k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
259k
  }
4854
75.7M
  if (len + 5 >= size) {
4855
108k
      xmlChar *new_buf;
4856
108k
            size_t new_size;
4857
4858
108k
      new_size = size * 2;
4859
108k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
108k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
108k
      buf = new_buf;
4866
108k
            size = new_size;
4867
108k
  }
4868
75.7M
  COPY_BUF(ql,buf,len,q);
4869
75.7M
  q = r;
4870
75.7M
  ql = rl;
4871
75.7M
  r = cur;
4872
75.7M
  rl = l;
4873
4874
75.7M
  count++;
4875
75.7M
  if (count > 50) {
4876
1.45M
      SHRINK;
4877
1.45M
      GROW;
4878
1.45M
      count = 0;
4879
1.45M
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
1.45M
  }
4884
75.7M
  NEXTL(l);
4885
75.7M
  cur = CUR_CHAR(l);
4886
75.7M
  if (cur == 0) {
4887
43.0k
      SHRINK;
4888
43.0k
      GROW;
4889
43.0k
      cur = CUR_CHAR(l);
4890
43.0k
  }
4891
4892
75.7M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
75.7M
    }
4899
113k
    buf[len] = 0;
4900
113k
    if (cur == 0) {
4901
43.0k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
43.0k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
70.2k
    } else if (!IS_CHAR(cur)) {
4904
20.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
20.7k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
20.7k
                    cur);
4907
49.5k
    } else {
4908
49.5k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
49.5k
        NEXT;
4914
49.5k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
49.5k
      (!ctxt->disableSAX))
4916
39.8k
      ctxt->sax->comment(ctxt->userData, buf);
4917
49.5k
    }
4918
113k
    xmlFree(buf);
4919
113k
    return;
4920
82.8k
not_terminated:
4921
82.8k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
82.8k
       "Comment not terminated\n", NULL);
4923
82.8k
    xmlFree(buf);
4924
82.8k
    return;
4925
113k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
30.9M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
30.9M
    xmlChar *buf = NULL;
4943
30.9M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
30.9M
    size_t len = 0;
4945
30.9M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
3.48M
                       XML_MAX_HUGE_LENGTH :
4947
30.9M
                       XML_MAX_TEXT_LENGTH;
4948
30.9M
    xmlParserInputState state;
4949
30.9M
    const xmlChar *in;
4950
30.9M
    size_t nbchar = 0;
4951
30.9M
    int ccol;
4952
30.9M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
30.9M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
30.9M
    SKIP(2);
4960
30.9M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
516
        return;
4962
30.9M
    state = ctxt->instate;
4963
30.9M
    ctxt->instate = XML_PARSER_COMMENT;
4964
30.9M
    inputid = ctxt->input->id;
4965
30.9M
    SKIP(2);
4966
30.9M
    SHRINK;
4967
30.9M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
30.9M
    in = ctxt->input->cur;
4974
30.9M
    do {
4975
30.9M
  if (*in == 0xA) {
4976
82.7k
      do {
4977
82.7k
    ctxt->input->line++; ctxt->input->col = 1;
4978
82.7k
    in++;
4979
82.7k
      } while (*in == 0xA);
4980
65.5k
  }
4981
40.0M
get_more:
4982
40.0M
        ccol = ctxt->input->col;
4983
244M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
244M
         ((*in >= 0x20) && (*in < '-')) ||
4985
244M
         (*in == 0x09)) {
4986
204M
        in++;
4987
204M
        ccol++;
4988
204M
  }
4989
40.0M
  ctxt->input->col = ccol;
4990
40.0M
  if (*in == 0xA) {
4991
4.29M
      do {
4992
4.29M
    ctxt->input->line++; ctxt->input->col = 1;
4993
4.29M
    in++;
4994
4.29M
      } while (*in == 0xA);
4995
784k
      goto get_more;
4996
784k
  }
4997
39.3M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
39.3M
  if (nbchar > 0) {
5002
9.55M
      if ((ctxt->sax != NULL) &&
5003
9.55M
    (ctxt->sax->comment != NULL)) {
5004
9.55M
    if (buf == NULL) {
5005
1.32M
        if ((*in == '-') && (in[1] == '-'))
5006
776k
            size = nbchar + 1;
5007
544k
        else
5008
544k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
1.32M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
1.32M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
1.32M
        len = 0;
5016
8.23M
    } else if (len + nbchar + 1 >= size) {
5017
477k
        xmlChar *new_buf;
5018
477k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
477k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
477k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
477k
        buf = new_buf;
5027
477k
    }
5028
9.55M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
9.55M
    len += nbchar;
5030
9.55M
    buf[len] = 0;
5031
9.55M
      }
5032
9.55M
  }
5033
39.3M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
39.3M
  ctxt->input->cur = in;
5040
39.3M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
39.3M
  if (*in == 0xD) {
5045
2.01M
      in++;
5046
2.01M
      if (*in == 0xA) {
5047
2.01M
    ctxt->input->cur = in;
5048
2.01M
    in++;
5049
2.01M
    ctxt->input->line++; ctxt->input->col = 1;
5050
2.01M
    goto get_more;
5051
2.01M
      }
5052
6.20k
      in--;
5053
6.20k
  }
5054
37.3M
  SHRINK;
5055
37.3M
  GROW;
5056
37.3M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
37.3M
  in = ctxt->input->cur;
5061
37.3M
  if (*in == '-') {
5062
37.0M
      if (in[1] == '-') {
5063
36.3M
          if (in[2] == '>') {
5064
30.7M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
30.7M
        SKIP(3);
5070
30.7M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
30.7M
            (!ctxt->disableSAX)) {
5072
28.6M
      if (buf != NULL)
5073
991k
          ctxt->sax->comment(ctxt->userData, buf);
5074
27.6M
      else
5075
27.6M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
28.6M
        }
5077
30.7M
        if (buf != NULL)
5078
1.10M
            xmlFree(buf);
5079
30.7M
        if (ctxt->instate != XML_PARSER_EOF)
5080
30.7M
      ctxt->instate = state;
5081
30.7M
        return;
5082
30.7M
    }
5083
5.59M
    if (buf != NULL) {
5084
5.53M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
5.53M
                          "Double hyphen within comment: "
5086
5.53M
                                      "<!--%.50s\n",
5087
5.53M
              buf);
5088
5.53M
    } else
5089
58.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
58.5k
                          "Double hyphen within comment\n", NULL);
5091
5.59M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
5.59M
    in++;
5096
5.59M
    ctxt->input->col++;
5097
5.59M
      }
5098
6.31M
      in++;
5099
6.31M
      ctxt->input->col++;
5100
6.31M
      goto get_more;
5101
37.0M
  }
5102
37.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
226k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
226k
    ctxt->instate = state;
5105
226k
    return;
5106
30.9M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
367k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
367k
    const xmlChar *name;
5125
5126
367k
    name = xmlParseName(ctxt);
5127
367k
    if ((name != NULL) &&
5128
367k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
367k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
367k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
106k
  int i;
5132
106k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
106k
      (name[2] == 'l') && (name[3] == 0)) {
5134
52.3k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
52.3k
     "XML declaration allowed only at the start of the document\n");
5136
52.3k
      return(name);
5137
53.9k
  } else if (name[3] == 0) {
5138
5.12k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
5.12k
      return(name);
5140
5.12k
  }
5141
117k
  for (i = 0;;i++) {
5142
117k
      if (xmlW3CPIs[i] == NULL) break;
5143
83.1k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
14.5k
          return(name);
5145
83.1k
  }
5146
34.2k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
34.2k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
34.2k
          NULL, NULL);
5149
34.2k
    }
5150
295k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
9.45k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
9.45k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
9.45k
    }
5154
295k
    return(name);
5155
367k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
1.80k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
1.80k
    xmlChar *URL = NULL;
5176
1.80k
    const xmlChar *tmp, *base;
5177
1.80k
    xmlChar marker;
5178
5179
1.80k
    tmp = catalog;
5180
1.80k
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
1.80k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
819
  goto error;
5183
987
    tmp += 7;
5184
69.6k
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
987
    if (*tmp != '=') {
5186
345
  return;
5187
345
    }
5188
642
    tmp++;
5189
10.2k
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
642
    marker = *tmp;
5191
642
    if ((marker != '\'') && (marker != '"'))
5192
279
  goto error;
5193
363
    tmp++;
5194
363
    base = tmp;
5195
7.97k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
363
    if (*tmp == 0)
5197
39
  goto error;
5198
324
    URL = xmlStrndup(base, tmp - base);
5199
324
    tmp++;
5200
24.5k
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
324
    if (*tmp != 0)
5202
276
  goto error;
5203
5204
48
    if (URL != NULL) {
5205
48
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
48
  xmlFree(URL);
5207
48
    }
5208
48
    return;
5209
5210
1.41k
error:
5211
1.41k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
1.41k
            "Catalog PI syntax error: %s\n",
5213
1.41k
      catalog, NULL);
5214
1.41k
    if (URL != NULL)
5215
276
  xmlFree(URL);
5216
1.41k
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
367k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
367k
    xmlChar *buf = NULL;
5235
367k
    size_t len = 0;
5236
367k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
367k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
124k
                       XML_MAX_HUGE_LENGTH :
5239
367k
                       XML_MAX_TEXT_LENGTH;
5240
367k
    int cur, l;
5241
367k
    const xmlChar *target;
5242
367k
    xmlParserInputState state;
5243
367k
    int count = 0;
5244
5245
367k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
367k
  int inputid = ctxt->input->id;
5247
367k
  state = ctxt->instate;
5248
367k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
367k
  SKIP(2);
5253
367k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
367k
        target = xmlParsePITarget(ctxt);
5260
367k
  if (target != NULL) {
5261
342k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
70.6k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
70.6k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
70.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
70.6k
        (ctxt->sax->processingInstruction != NULL))
5274
58.6k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
58.6k
                                         target, NULL);
5276
70.6k
    if (ctxt->instate != XML_PARSER_EOF)
5277
70.6k
        ctxt->instate = state;
5278
70.6k
    return;
5279
70.6k
      }
5280
271k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
271k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
271k
      if (SKIP_BLANKS == 0) {
5287
80.2k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
80.2k
        "ParsePI: PI %s space expected\n", target);
5289
80.2k
      }
5290
271k
      cur = CUR_CHAR(l);
5291
50.1M
      while (IS_CHAR(cur) && /* checked */
5292
50.1M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
49.8M
    if (len + 5 >= size) {
5294
49.4k
        xmlChar *tmp;
5295
49.4k
                    size_t new_size = size * 2;
5296
49.4k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
49.4k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
49.4k
        buf = tmp;
5304
49.4k
                    size = new_size;
5305
49.4k
    }
5306
49.8M
    count++;
5307
49.8M
    if (count > 50) {
5308
895k
        SHRINK;
5309
895k
        GROW;
5310
895k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
895k
        count = 0;
5315
895k
    }
5316
49.8M
    COPY_BUF(l,buf,len,cur);
5317
49.8M
    NEXTL(l);
5318
49.8M
    cur = CUR_CHAR(l);
5319
49.8M
    if (cur == 0) {
5320
31.5k
        SHRINK;
5321
31.5k
        GROW;
5322
31.5k
        cur = CUR_CHAR(l);
5323
31.5k
    }
5324
49.8M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
49.8M
      }
5332
271k
      buf[len] = 0;
5333
271k
      if (cur != '?') {
5334
52.6k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
52.6k
          "ParsePI: PI %s never end ...\n", target);
5336
218k
      } else {
5337
218k
    if (inputid != ctxt->input->id) {
5338
7
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
7
                             "PI declaration doesn't start and stop in"
5340
7
                                   " the same entity\n");
5341
7
    }
5342
218k
    SKIP(2);
5343
5344
218k
#ifdef LIBXML_CATALOG_ENABLED
5345
218k
    if (((state == XML_PARSER_MISC) ||
5346
218k
               (state == XML_PARSER_START)) &&
5347
218k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
1.80k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
1.80k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
1.80k
      (allow == XML_CATA_ALLOW_ALL))
5351
1.80k
      xmlParseCatalogPI(ctxt, buf);
5352
1.80k
    }
5353
218k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
218k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
218k
        (ctxt->sax->processingInstruction != NULL))
5361
189k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
189k
                                         target, buf);
5363
218k
      }
5364
271k
      xmlFree(buf);
5365
271k
  } else {
5366
25.0k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
25.0k
  }
5368
296k
  if (ctxt->instate != XML_PARSER_EOF)
5369
296k
      ctxt->instate = state;
5370
296k
    }
5371
367k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
19.0k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
19.0k
    const xmlChar *name;
5394
19.0k
    xmlChar *Pubid;
5395
19.0k
    xmlChar *Systemid;
5396
5397
19.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
19.0k
    SKIP(2);
5400
5401
19.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
18.4k
  int inputid = ctxt->input->id;
5403
18.4k
  SHRINK;
5404
18.4k
  SKIP(8);
5405
18.4k
  if (SKIP_BLANKS == 0) {
5406
310
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
310
         "Space required after '<!NOTATION'\n");
5408
310
      return;
5409
310
  }
5410
5411
18.1k
        name = xmlParseName(ctxt);
5412
18.1k
  if (name == NULL) {
5413
1.83k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
1.83k
      return;
5415
1.83k
  }
5416
16.3k
  if (xmlStrchr(name, ':') != NULL) {
5417
309
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
309
         "colons are forbidden from notation names '%s'\n",
5419
309
         name, NULL, NULL);
5420
309
  }
5421
16.3k
  if (SKIP_BLANKS == 0) {
5422
1.09k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.09k
         "Space required after the NOTATION name'\n");
5424
1.09k
      return;
5425
1.09k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
15.2k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
15.2k
  SKIP_BLANKS;
5432
5433
15.2k
  if (RAW == '>') {
5434
8.82k
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
8.82k
      NEXT;
5440
8.82k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
8.82k
    (ctxt->sax->notationDecl != NULL))
5442
6.21k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
8.82k
  } else {
5444
6.38k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
6.38k
  }
5446
15.2k
  if (Systemid != NULL) xmlFree(Systemid);
5447
15.2k
  if (Pubid != NULL) xmlFree(Pubid);
5448
15.2k
    }
5449
19.0k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
700k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
700k
    const xmlChar *name = NULL;
5478
700k
    xmlChar *value = NULL;
5479
700k
    xmlChar *URI = NULL, *literal = NULL;
5480
700k
    const xmlChar *ndata = NULL;
5481
700k
    int isParameter = 0;
5482
700k
    xmlChar *orig = NULL;
5483
5484
700k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
700k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
700k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
696k
  int inputid = ctxt->input->id;
5491
696k
  SHRINK;
5492
696k
  SKIP(6);
5493
696k
  if (SKIP_BLANKS == 0) {
5494
3.19k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
3.19k
         "Space required after '<!ENTITY'\n");
5496
3.19k
  }
5497
5498
696k
  if (RAW == '%') {
5499
233k
      NEXT;
5500
233k
      if (SKIP_BLANKS == 0) {
5501
409
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
409
             "Space required after '%%'\n");
5503
409
      }
5504
233k
      isParameter = 1;
5505
233k
  }
5506
5507
696k
        name = xmlParseName(ctxt);
5508
696k
  if (name == NULL) {
5509
3.27k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
3.27k
                     "xmlParseEntityDecl: no name\n");
5511
3.27k
            return;
5512
3.27k
  }
5513
693k
  if (xmlStrchr(name, ':') != NULL) {
5514
446
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
446
         "colons are forbidden from entities names '%s'\n",
5516
446
         name, NULL, NULL);
5517
446
  }
5518
693k
  if (SKIP_BLANKS == 0) {
5519
7.55k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
7.55k
         "Space required after the entity name\n");
5521
7.55k
  }
5522
5523
693k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
693k
  if (isParameter) {
5528
233k
      if ((RAW == '"') || (RAW == '\'')) {
5529
213k
          value = xmlParseEntityValue(ctxt, &orig);
5530
213k
    if (value) {
5531
200k
        if ((ctxt->sax != NULL) &&
5532
200k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
181k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
181k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
181k
            NULL, NULL, value);
5536
200k
    }
5537
213k
      } else {
5538
20.0k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
20.0k
    if ((URI == NULL) && (literal == NULL)) {
5540
1.19k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
1.19k
    }
5542
20.0k
    if (URI) {
5543
18.7k
        xmlURIPtr uri;
5544
5545
18.7k
        uri = xmlParseURI((const char *) URI);
5546
18.7k
        if (uri == NULL) {
5547
561
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
561
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
18.2k
        } else {
5555
18.2k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
66
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
18.1k
      } else {
5562
18.1k
          if ((ctxt->sax != NULL) &&
5563
18.1k
        (!ctxt->disableSAX) &&
5564
18.1k
        (ctxt->sax->entityDecl != NULL))
5565
17.6k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
17.6k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
17.6k
              literal, URI, NULL);
5568
18.1k
      }
5569
18.2k
      xmlFreeURI(uri);
5570
18.2k
        }
5571
18.7k
    }
5572
20.0k
      }
5573
460k
  } else {
5574
460k
      if ((RAW == '"') || (RAW == '\'')) {
5575
325k
          value = xmlParseEntityValue(ctxt, &orig);
5576
325k
    if ((ctxt->sax != NULL) &&
5577
325k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
288k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
288k
        XML_INTERNAL_GENERAL_ENTITY,
5580
288k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
325k
    if ((ctxt->myDoc == NULL) ||
5585
325k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
5.32k
        if (ctxt->myDoc == NULL) {
5587
732
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
732
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
732
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
732
        }
5594
5.32k
        if (ctxt->myDoc->intSubset == NULL)
5595
732
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
732
              BAD_CAST "fake", NULL, NULL);
5597
5598
5.32k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
5.32k
                    NULL, NULL, value);
5600
5.32k
    }
5601
325k
      } else {
5602
135k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
135k
    if ((URI == NULL) && (literal == NULL)) {
5604
9.00k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
9.00k
    }
5606
135k
    if (URI) {
5607
119k
        xmlURIPtr uri;
5608
5609
119k
        uri = xmlParseURI((const char *)URI);
5610
119k
        if (uri == NULL) {
5611
6.41k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
6.41k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
112k
        } else {
5619
112k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
1.26k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
1.26k
      }
5626
112k
      xmlFreeURI(uri);
5627
112k
        }
5628
119k
    }
5629
135k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
12.7k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
12.7k
           "Space required before 'NDATA'\n");
5632
12.7k
    }
5633
135k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
10.4k
        SKIP(5);
5635
10.4k
        if (SKIP_BLANKS == 0) {
5636
370
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
370
               "Space required after 'NDATA'\n");
5638
370
        }
5639
10.4k
        ndata = xmlParseName(ctxt);
5640
10.4k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
10.4k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
9.68k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
9.68k
            literal, URI, ndata);
5644
124k
    } else {
5645
124k
        if ((ctxt->sax != NULL) &&
5646
124k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
110k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
110k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
110k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
124k
        if ((ctxt->replaceEntities != 0) &&
5655
124k
      ((ctxt->myDoc == NULL) ||
5656
87.7k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
1.18k
      if (ctxt->myDoc == NULL) {
5658
177
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
177
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
177
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
177
      }
5665
5666
1.18k
      if (ctxt->myDoc->intSubset == NULL)
5667
177
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
177
            BAD_CAST "fake", NULL, NULL);
5669
1.18k
      xmlSAX2EntityDecl(ctxt, name,
5670
1.18k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
1.18k
                  literal, URI, NULL);
5672
1.18k
        }
5673
124k
    }
5674
135k
      }
5675
460k
  }
5676
693k
  if (ctxt->instate == XML_PARSER_EOF)
5677
483
      goto done;
5678
693k
  SKIP_BLANKS;
5679
693k
  if (RAW != '>') {
5680
29.8k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
29.8k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
29.8k
      xmlHaltParser(ctxt);
5683
663k
  } else {
5684
663k
      if (inputid != ctxt->input->id) {
5685
105
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
105
                         "Entity declaration doesn't start and stop in"
5687
105
                               " the same entity\n");
5688
105
      }
5689
663k
      NEXT;
5690
663k
  }
5691
693k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
511k
      xmlEntityPtr cur = NULL;
5696
5697
511k
      if (isParameter) {
5698
201k
          if ((ctxt->sax != NULL) &&
5699
201k
        (ctxt->sax->getParameterEntity != NULL))
5700
201k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
309k
      } else {
5702
309k
          if ((ctxt->sax != NULL) &&
5703
309k
        (ctxt->sax->getEntity != NULL))
5704
309k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
309k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
17.5k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
17.5k
    }
5708
309k
      }
5709
511k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
437k
    cur->orig = orig;
5711
437k
                orig = NULL;
5712
437k
      }
5713
511k
  }
5714
5715
693k
done:
5716
693k
  if (value != NULL) xmlFree(value);
5717
693k
  if (URI != NULL) xmlFree(URI);
5718
693k
  if (literal != NULL) xmlFree(literal);
5719
693k
        if (orig != NULL) xmlFree(orig);
5720
693k
    }
5721
700k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
1.99M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
1.99M
    int val;
5757
1.99M
    xmlChar *ret;
5758
5759
1.99M
    *value = NULL;
5760
1.99M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
140k
  SKIP(9);
5762
140k
  return(XML_ATTRIBUTE_REQUIRED);
5763
140k
    }
5764
1.84M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
1.37M
  SKIP(8);
5766
1.37M
  return(XML_ATTRIBUTE_IMPLIED);
5767
1.37M
    }
5768
478k
    val = XML_ATTRIBUTE_NONE;
5769
478k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
153k
  SKIP(6);
5771
153k
  val = XML_ATTRIBUTE_FIXED;
5772
153k
  if (SKIP_BLANKS == 0) {
5773
192
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
192
         "Space required after '#FIXED'\n");
5775
192
  }
5776
153k
    }
5777
478k
    ret = xmlParseAttValue(ctxt);
5778
478k
    ctxt->instate = XML_PARSER_DTD;
5779
478k
    if (ret == NULL) {
5780
5.14k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
5.14k
           "Attribute default value declaration error\n");
5782
5.14k
    } else
5783
473k
        *value = ret;
5784
478k
    return(val);
5785
1.84M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
3.80k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
3.80k
    const xmlChar *name;
5809
3.80k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
3.80k
    if (RAW != '(') {
5812
182
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
182
  return(NULL);
5814
182
    }
5815
3.62k
    SHRINK;
5816
10.4k
    do {
5817
10.4k
        NEXT;
5818
10.4k
  SKIP_BLANKS;
5819
10.4k
        name = xmlParseName(ctxt);
5820
10.4k
  if (name == NULL) {
5821
389
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
389
         "Name expected in NOTATION declaration\n");
5823
389
            xmlFreeEnumeration(ret);
5824
389
      return(NULL);
5825
389
  }
5826
10.0k
  tmp = ret;
5827
28.5k
  while (tmp != NULL) {
5828
21.3k
      if (xmlStrEqual(name, tmp->name)) {
5829
2.90k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
2.90k
    "standalone: attribute notation value token %s duplicated\n",
5831
2.90k
         name, NULL);
5832
2.90k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
2.90k
    break;
5835
2.90k
      }
5836
18.4k
      tmp = tmp->next;
5837
18.4k
  }
5838
10.0k
  if (tmp == NULL) {
5839
7.11k
      cur = xmlCreateEnumeration(name);
5840
7.11k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
7.11k
      if (last == NULL) ret = last = cur;
5845
3.71k
      else {
5846
3.71k
    last->next = cur;
5847
3.71k
    last = cur;
5848
3.71k
      }
5849
7.11k
  }
5850
10.0k
  SKIP_BLANKS;
5851
10.0k
    } while (RAW == '|');
5852
3.23k
    if (RAW != ')') {
5853
1.65k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
1.65k
        xmlFreeEnumeration(ret);
5855
1.65k
  return(NULL);
5856
1.65k
    }
5857
1.58k
    NEXT;
5858
1.58k
    return(ret);
5859
3.23k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
234k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
234k
    xmlChar *name;
5881
234k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
234k
    if (RAW != '(') {
5884
14.6k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
14.6k
  return(NULL);
5886
14.6k
    }
5887
220k
    SHRINK;
5888
586k
    do {
5889
586k
        NEXT;
5890
586k
  SKIP_BLANKS;
5891
586k
        name = xmlParseNmtoken(ctxt);
5892
586k
  if (name == NULL) {
5893
450
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
450
      return(ret);
5895
450
  }
5896
585k
  tmp = ret;
5897
1.46M
  while (tmp != NULL) {
5898
891k
      if (xmlStrEqual(name, tmp->name)) {
5899
14.8k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
14.8k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
14.8k
         name, NULL);
5902
14.8k
    if (!xmlDictOwns(ctxt->dict, name))
5903
14.8k
        xmlFree(name);
5904
14.8k
    break;
5905
14.8k
      }
5906
877k
      tmp = tmp->next;
5907
877k
  }
5908
585k
  if (tmp == NULL) {
5909
571k
      cur = xmlCreateEnumeration(name);
5910
571k
      if (!xmlDictOwns(ctxt->dict, name))
5911
571k
    xmlFree(name);
5912
571k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
571k
      if (last == NULL) ret = last = cur;
5917
351k
      else {
5918
351k
    last->next = cur;
5919
351k
    last = cur;
5920
351k
      }
5921
571k
  }
5922
585k
  SKIP_BLANKS;
5923
585k
    } while (RAW == '|');
5924
219k
    if (RAW != ')') {
5925
4.88k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
4.88k
  return(ret);
5927
4.88k
    }
5928
214k
    NEXT;
5929
214k
    return(ret);
5930
219k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
238k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
238k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
3.95k
  SKIP(8);
5953
3.95k
  if (SKIP_BLANKS == 0) {
5954
144
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
144
         "Space required after 'NOTATION'\n");
5956
144
      return(0);
5957
144
  }
5958
3.80k
  *tree = xmlParseNotationType(ctxt);
5959
3.80k
  if (*tree == NULL) return(0);
5960
1.58k
  return(XML_ATTRIBUTE_NOTATION);
5961
3.80k
    }
5962
234k
    *tree = xmlParseEnumerationType(ctxt);
5963
234k
    if (*tree == NULL) return(0);
5964
219k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
234k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
2.01M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
2.01M
    SHRINK;
6017
2.01M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
881k
  SKIP(5);
6019
881k
  return(XML_ATTRIBUTE_CDATA);
6020
1.13M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
47.0k
  SKIP(6);
6022
47.0k
  return(XML_ATTRIBUTE_IDREFS);
6023
1.08M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
84.9k
  SKIP(5);
6025
84.9k
  return(XML_ATTRIBUTE_IDREF);
6026
1.00M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
376k
        SKIP(2);
6028
376k
  return(XML_ATTRIBUTE_ID);
6029
626k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
5.30k
  SKIP(6);
6031
5.30k
  return(XML_ATTRIBUTE_ENTITY);
6032
620k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
707
  SKIP(8);
6034
707
  return(XML_ATTRIBUTE_ENTITIES);
6035
620k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
117k
  SKIP(8);
6037
117k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
502k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
264k
  SKIP(7);
6040
264k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
264k
     }
6042
238k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
2.01M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
868k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
868k
    const xmlChar *elemName;
6061
868k
    const xmlChar *attrName;
6062
868k
    xmlEnumerationPtr tree;
6063
6064
868k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
868k
    SKIP(2);
6067
6068
868k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
861k
  int inputid = ctxt->input->id;
6070
6071
861k
  SKIP(7);
6072
861k
  if (SKIP_BLANKS == 0) {
6073
4.30k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
4.30k
                     "Space required after '<!ATTLIST'\n");
6075
4.30k
  }
6076
861k
        elemName = xmlParseName(ctxt);
6077
861k
  if (elemName == NULL) {
6078
1.63k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
1.63k
         "ATTLIST: no name for Element\n");
6080
1.63k
      return;
6081
1.63k
  }
6082
860k
  SKIP_BLANKS;
6083
860k
  GROW;
6084
2.82M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
2.03M
      int type;
6086
2.03M
      int def;
6087
2.03M
      xmlChar *defaultValue = NULL;
6088
6089
2.03M
      GROW;
6090
2.03M
            tree = NULL;
6091
2.03M
      attrName = xmlParseName(ctxt);
6092
2.03M
      if (attrName == NULL) {
6093
12.2k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
12.2k
             "ATTLIST: no name for Attribute\n");
6095
12.2k
    break;
6096
12.2k
      }
6097
2.02M
      GROW;
6098
2.02M
      if (SKIP_BLANKS == 0) {
6099
9.60k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
9.60k
            "Space required after the attribute name\n");
6101
9.60k
    break;
6102
9.60k
      }
6103
6104
2.01M
      type = xmlParseAttributeType(ctxt, &tree);
6105
2.01M
      if (type <= 0) {
6106
17.1k
          break;
6107
17.1k
      }
6108
6109
1.99M
      GROW;
6110
1.99M
      if (SKIP_BLANKS == 0) {
6111
8.39k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
8.39k
             "Space required after the attribute type\n");
6113
8.39k
          if (tree != NULL)
6114
5.50k
        xmlFreeEnumeration(tree);
6115
8.39k
    break;
6116
8.39k
      }
6117
6118
1.99M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
1.99M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
1.99M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
319k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
1.99M
      GROW;
6130
1.99M
            if (RAW != '>') {
6131
1.61M
    if (SKIP_BLANKS == 0) {
6132
26.2k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
26.2k
      "Space required after the attribute default value\n");
6134
26.2k
        if (defaultValue != NULL)
6135
20.9k
      xmlFree(defaultValue);
6136
26.2k
        if (tree != NULL)
6137
2.15k
      xmlFreeEnumeration(tree);
6138
26.2k
        break;
6139
26.2k
    }
6140
1.61M
      }
6141
1.96M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
1.96M
    (ctxt->sax->attributeDecl != NULL))
6143
1.79M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
1.79M
                          type, def, defaultValue, tree);
6145
164k
      else if (tree != NULL)
6146
20.1k
    xmlFreeEnumeration(tree);
6147
6148
1.96M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
1.96M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
1.96M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
268k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
268k
      }
6153
1.96M
      if (ctxt->sax2) {
6154
1.49M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
1.49M
      }
6156
1.96M
      if (defaultValue != NULL)
6157
452k
          xmlFree(defaultValue);
6158
1.96M
      GROW;
6159
1.96M
  }
6160
860k
  if (RAW == '>') {
6161
789k
      if (inputid != ctxt->input->id) {
6162
5.51k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
5.51k
                               "Attribute list declaration doesn't start and"
6164
5.51k
                               " stop in the same entity\n");
6165
5.51k
      }
6166
789k
      NEXT;
6167
789k
  }
6168
860k
    }
6169
868k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
356k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
356k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
356k
    const xmlChar *elem = NULL;
6196
6197
356k
    GROW;
6198
356k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
356k
  SKIP(7);
6200
356k
  SKIP_BLANKS;
6201
356k
  SHRINK;
6202
356k
  if (RAW == ')') {
6203
254k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
254k
      NEXT;
6209
254k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
254k
      if (ret == NULL)
6211
0
          return(NULL);
6212
254k
      if (RAW == '*') {
6213
66
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
66
    NEXT;
6215
66
      }
6216
254k
      return(ret);
6217
254k
  }
6218
102k
  if ((RAW == '(') || (RAW == '|')) {
6219
101k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
101k
      if (ret == NULL) return(NULL);
6221
101k
  }
6222
945k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
843k
      NEXT;
6224
843k
      if (elem == NULL) {
6225
101k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
101k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
101k
    ret->c1 = cur;
6231
101k
    if (cur != NULL)
6232
101k
        cur->parent = ret;
6233
101k
    cur = ret;
6234
741k
      } else {
6235
741k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
741k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
741k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
741k
    if (n->c1 != NULL)
6242
741k
        n->c1->parent = n;
6243
741k
          cur->c2 = n;
6244
741k
    if (n != NULL)
6245
741k
        n->parent = cur;
6246
741k
    cur = n;
6247
741k
      }
6248
843k
      SKIP_BLANKS;
6249
843k
      elem = xmlParseName(ctxt);
6250
843k
      if (elem == NULL) {
6251
292
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
292
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
292
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
292
    return(NULL);
6255
292
      }
6256
842k
      SKIP_BLANKS;
6257
842k
      GROW;
6258
842k
  }
6259
102k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
100k
      if (elem != NULL) {
6261
100k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
100k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
100k
    if (cur->c2 != NULL)
6264
100k
        cur->c2->parent = cur;
6265
100k
            }
6266
100k
            if (ret != NULL)
6267
100k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
100k
      if (ctxt->input->id != inputchk) {
6269
6
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
6
                               "Element content declaration doesn't start and"
6271
6
                               " stop in the same entity\n");
6272
6
      }
6273
100k
      SKIP(2);
6274
100k
  } else {
6275
1.60k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
1.60k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
1.60k
      return(NULL);
6278
1.60k
  }
6279
6280
102k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
100k
    return(ret);
6284
356k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
567k
                                       int depth) {
6321
567k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
567k
    const xmlChar *elem;
6323
567k
    xmlChar type = 0;
6324
6325
567k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
567k
        (depth >  2048)) {
6327
256
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
256
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
256
                          depth);
6330
256
  return(NULL);
6331
256
    }
6332
567k
    SKIP_BLANKS;
6333
567k
    GROW;
6334
567k
    if (RAW == '(') {
6335
221k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
221k
  NEXT;
6339
221k
  SKIP_BLANKS;
6340
221k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
221k
                                                           depth + 1);
6342
221k
        if (cur == NULL)
6343
197k
            return(NULL);
6344
24.2k
  SKIP_BLANKS;
6345
24.2k
  GROW;
6346
345k
    } else {
6347
345k
  elem = xmlParseName(ctxt);
6348
345k
  if (elem == NULL) {
6349
3.82k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
3.82k
      return(NULL);
6351
3.82k
  }
6352
341k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
341k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
341k
  GROW;
6358
341k
  if (RAW == '?') {
6359
32.7k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
32.7k
      NEXT;
6361
309k
  } else if (RAW == '*') {
6362
71.0k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
71.0k
      NEXT;
6364
238k
  } else if (RAW == '+') {
6365
40.5k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
40.5k
      NEXT;
6367
197k
  } else {
6368
197k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
197k
  }
6370
341k
  GROW;
6371
341k
    }
6372
366k
    SKIP_BLANKS;
6373
366k
    SHRINK;
6374
1.24M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
883k
        if (RAW == ',') {
6379
342k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
212k
      else if (type != CUR) {
6385
78
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
78
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
78
                      type);
6388
78
    if ((last != NULL) && (last != ret))
6389
78
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
78
    if (ret != NULL)
6391
78
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
78
    return(NULL);
6393
78
      }
6394
341k
      NEXT;
6395
6396
341k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
341k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
341k
      if (last == NULL) {
6404
129k
    op->c1 = ret;
6405
129k
    if (ret != NULL)
6406
129k
        ret->parent = op;
6407
129k
    ret = cur = op;
6408
212k
      } else {
6409
212k
          cur->c2 = op;
6410
212k
    if (op != NULL)
6411
212k
        op->parent = cur;
6412
212k
    op->c1 = last;
6413
212k
    if (last != NULL)
6414
212k
        last->parent = op;
6415
212k
    cur =op;
6416
212k
    last = NULL;
6417
212k
      }
6418
541k
  } else if (RAW == '|') {
6419
535k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
455k
      else if (type != CUR) {
6425
63
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
63
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
63
          type);
6428
63
    if ((last != NULL) && (last != ret))
6429
63
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
63
    if (ret != NULL)
6431
63
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
63
    return(NULL);
6433
63
      }
6434
535k
      NEXT;
6435
6436
535k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
535k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
535k
      if (last == NULL) {
6445
80.5k
    op->c1 = ret;
6446
80.5k
    if (ret != NULL)
6447
80.5k
        ret->parent = op;
6448
80.5k
    ret = cur = op;
6449
455k
      } else {
6450
455k
          cur->c2 = op;
6451
455k
    if (op != NULL)
6452
455k
        op->parent = cur;
6453
455k
    op->c1 = last;
6454
455k
    if (last != NULL)
6455
455k
        last->parent = op;
6456
455k
    cur =op;
6457
455k
    last = NULL;
6458
455k
      }
6459
535k
  } else {
6460
5.71k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
5.71k
      if ((last != NULL) && (last != ret))
6462
1.99k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
5.71k
      if (ret != NULL)
6464
5.71k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
5.71k
      return(NULL);
6466
5.71k
  }
6467
877k
  GROW;
6468
877k
  SKIP_BLANKS;
6469
877k
  GROW;
6470
877k
  if (RAW == '(') {
6471
51.2k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
51.2k
      NEXT;
6474
51.2k
      SKIP_BLANKS;
6475
51.2k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
51.2k
                                                          depth + 1);
6477
51.2k
            if (last == NULL) {
6478
1.17k
    if (ret != NULL)
6479
1.17k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
1.17k
    return(NULL);
6481
1.17k
            }
6482
50.1k
      SKIP_BLANKS;
6483
826k
  } else {
6484
826k
      elem = xmlParseName(ctxt);
6485
826k
      if (elem == NULL) {
6486
712
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
712
    if (ret != NULL)
6488
712
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
712
    return(NULL);
6490
712
      }
6491
825k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
825k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
825k
      if (RAW == '?') {
6498
124k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
124k
    NEXT;
6500
701k
      } else if (RAW == '*') {
6501
90.3k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
90.3k
    NEXT;
6503
611k
      } else if (RAW == '+') {
6504
19.0k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
19.0k
    NEXT;
6506
592k
      } else {
6507
592k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
592k
      }
6509
825k
  }
6510
875k
  SKIP_BLANKS;
6511
875k
  GROW;
6512
875k
    }
6513
358k
    if ((cur != NULL) && (last != NULL)) {
6514
205k
        cur->c2 = last;
6515
205k
  if (last != NULL)
6516
205k
      last->parent = cur;
6517
205k
    }
6518
358k
    if (ctxt->input->id != inputchk) {
6519
18
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
18
                       "Element content declaration doesn't start and stop in"
6521
18
                       " the same entity\n");
6522
18
    }
6523
358k
    NEXT;
6524
358k
    if (RAW == '?') {
6525
11.6k
  if (ret != NULL) {
6526
11.6k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
11.6k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
159
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
11.5k
      else
6530
11.5k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
11.6k
  }
6532
11.6k
  NEXT;
6533
346k
    } else if (RAW == '*') {
6534
75.0k
  if (ret != NULL) {
6535
75.0k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
75.0k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
393k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
318k
    if ((cur->c1 != NULL) &&
6543
318k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
318k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
14.0k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
318k
    if ((cur->c2 != NULL) &&
6547
318k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
318k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
2.32k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
318k
    cur = cur->c2;
6551
318k
      }
6552
75.0k
  }
6553
75.0k
  NEXT;
6554
271k
    } else if (RAW == '+') {
6555
30.2k
  if (ret != NULL) {
6556
30.2k
      int found = 0;
6557
6558
30.2k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
30.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
73
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
30.1k
      else
6562
30.1k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
46.8k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
16.5k
    if ((cur->c1 != NULL) &&
6570
16.5k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
16.5k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
60
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
60
        found = 1;
6574
60
    }
6575
16.5k
    if ((cur->c2 != NULL) &&
6576
16.5k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
16.5k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
101
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
101
        found = 1;
6580
101
    }
6581
16.5k
    cur = cur->c2;
6582
16.5k
      }
6583
30.2k
      if (found)
6584
132
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
30.2k
  }
6586
30.2k
  NEXT;
6587
30.2k
    }
6588
358k
    return(ret);
6589
366k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
651k
                           xmlElementContentPtr *result) {
6648
6649
651k
    xmlElementContentPtr tree = NULL;
6650
651k
    int inputid = ctxt->input->id;
6651
651k
    int res;
6652
6653
651k
    *result = NULL;
6654
6655
651k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
651k
    NEXT;
6661
651k
    GROW;
6662
651k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
651k
    SKIP_BLANKS;
6665
651k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
356k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
356k
  res = XML_ELEMENT_TYPE_MIXED;
6668
356k
    } else {
6669
294k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
294k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
294k
    }
6672
651k
    SKIP_BLANKS;
6673
651k
    *result = tree;
6674
651k
    return(res);
6675
651k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
923k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
923k
    const xmlChar *name;
6695
923k
    int ret = -1;
6696
923k
    xmlElementContentPtr content  = NULL;
6697
6698
923k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
923k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
923k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
917k
  int inputid = ctxt->input->id;
6705
6706
917k
  SKIP(7);
6707
917k
  if (SKIP_BLANKS == 0) {
6708
1.65k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
1.65k
               "Space required after 'ELEMENT'\n");
6710
1.65k
      return(-1);
6711
1.65k
  }
6712
915k
        name = xmlParseName(ctxt);
6713
915k
  if (name == NULL) {
6714
1.13k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
1.13k
         "xmlParseElementDecl: no name for Element\n");
6716
1.13k
      return(-1);
6717
1.13k
  }
6718
914k
  if (SKIP_BLANKS == 0) {
6719
4.69k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
4.69k
         "Space required after the element name\n");
6721
4.69k
  }
6722
914k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
250k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
250k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
664k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
664k
             (NXT(2) == 'Y')) {
6730
5.33k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
5.33k
      ret = XML_ELEMENT_TYPE_ANY;
6735
658k
  } else if (RAW == '(') {
6736
651k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
651k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
7.43k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
7.43k
          (ctxt->inputNr == 1)) {
6743
316
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
316
    "PEReference: forbidden within markup decl in internal subset\n");
6745
7.12k
      } else {
6746
7.12k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
7.12k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
7.12k
            }
6749
7.43k
      return(-1);
6750
7.43k
  }
6751
6752
907k
  SKIP_BLANKS;
6753
6754
907k
  if (RAW != '>') {
6755
12.6k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
12.6k
      if (content != NULL) {
6757
1.12k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
1.12k
      }
6759
894k
  } else {
6760
894k
      if (inputid != ctxt->input->id) {
6761
48
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
48
                               "Element declaration doesn't start and stop in"
6763
48
                               " the same entity\n");
6764
48
      }
6765
6766
894k
      NEXT;
6767
894k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
894k
    (ctxt->sax->elementDecl != NULL)) {
6769
824k
    if (content != NULL)
6770
584k
        content->parent = NULL;
6771
824k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
824k
                           content);
6773
824k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
49.6k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
49.6k
    }
6782
824k
      } else if (content != NULL) {
6783
53.3k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
53.3k
      }
6785
894k
  }
6786
907k
    }
6787
912k
    return(ret);
6788
923k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
8.24k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
8.24k
    int *inputIds = NULL;
6806
8.24k
    size_t inputIdsSize = 0;
6807
8.24k
    size_t depth = 0;
6808
6809
67.7k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
67.4k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
29.0k
            int id = ctxt->input->id;
6812
6813
29.0k
            SKIP(3);
6814
29.0k
            SKIP_BLANKS;
6815
6816
29.0k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
25.4k
                SKIP(7);
6818
25.4k
                SKIP_BLANKS;
6819
25.4k
                if (RAW != '[') {
6820
106
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
106
                    xmlHaltParser(ctxt);
6822
106
                    goto error;
6823
106
                }
6824
25.3k
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
25.3k
                NEXT;
6830
6831
25.3k
                if (inputIdsSize <= depth) {
6832
7.50k
                    int *tmp;
6833
6834
7.50k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
7.50k
                    tmp = (int *) xmlRealloc(inputIds,
6836
7.50k
                            inputIdsSize * sizeof(int));
6837
7.50k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
7.50k
                    inputIds = tmp;
6842
7.50k
                }
6843
25.3k
                inputIds[depth] = id;
6844
25.3k
                depth++;
6845
25.3k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
2.32k
                size_t ignoreDepth = 0;
6847
6848
2.32k
                SKIP(6);
6849
2.32k
                SKIP_BLANKS;
6850
2.32k
                if (RAW != '[') {
6851
72
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
72
                    xmlHaltParser(ctxt);
6853
72
                    goto error;
6854
72
                }
6855
2.25k
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
2.25k
                NEXT;
6861
6862
5.11M
                while (RAW != 0) {
6863
5.11M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
8.40k
                        SKIP(3);
6865
8.40k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
8.40k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
5.10M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
5.10M
                               (NXT(2) == '>')) {
6873
6.51k
                        if (ignoreDepth == 0)
6874
1.02k
                            break;
6875
5.49k
                        SKIP(3);
6876
5.49k
                        ignoreDepth--;
6877
5.10M
                    } else {
6878
5.10M
                        NEXT;
6879
5.10M
                    }
6880
5.11M
                }
6881
6882
2.25k
    if (RAW == 0) {
6883
1.23k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
1.23k
                    goto error;
6885
1.23k
    }
6886
1.02k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
1.02k
                SKIP(3);
6892
1.30k
            } else {
6893
1.30k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
1.30k
                xmlHaltParser(ctxt);
6895
1.30k
                goto error;
6896
1.30k
            }
6897
38.3k
        } else if ((depth > 0) &&
6898
38.3k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
12.3k
            depth--;
6900
12.3k
            if (ctxt->input->id != inputIds[depth]) {
6901
564
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
564
                               "All markup of the conditional section is not"
6903
564
                               " in the same entity\n");
6904
564
            }
6905
12.3k
            SKIP(3);
6906
25.9k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
23.1k
            xmlParseMarkupDecl(ctxt);
6908
23.1k
        } else {
6909
2.87k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
2.87k
            xmlHaltParser(ctxt);
6911
2.87k
            goto error;
6912
2.87k
        }
6913
6914
61.8k
        if (depth == 0)
6915
2.40k
            break;
6916
6917
59.4k
        SKIP_BLANKS;
6918
59.4k
        GROW;
6919
59.4k
    }
6920
6921
8.24k
error:
6922
8.24k
    xmlFree(inputIds);
6923
8.24k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
32.6M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
32.6M
    GROW;
6952
32.6M
    if (CUR == '<') {
6953
32.6M
        if (NXT(1) == '!') {
6954
32.5M
      switch (NXT(2)) {
6955
1.62M
          case 'E':
6956
1.62M
        if (NXT(3) == 'L')
6957
923k
      xmlParseElementDecl(ctxt);
6958
702k
        else if (NXT(3) == 'N')
6959
700k
      xmlParseEntityDecl(ctxt);
6960
2.17k
                    else
6961
2.17k
                        SKIP(2);
6962
1.62M
        break;
6963
868k
          case 'A':
6964
868k
        xmlParseAttributeListDecl(ctxt);
6965
868k
        break;
6966
19.0k
          case 'N':
6967
19.0k
        xmlParseNotationDecl(ctxt);
6968
19.0k
        break;
6969
30.0M
          case '-':
6970
30.0M
        xmlParseComment(ctxt);
6971
30.0M
        break;
6972
7.99k
    default:
6973
        /* there is an error but it will be detected later */
6974
7.99k
                    SKIP(2);
6975
7.99k
        break;
6976
32.5M
      }
6977
32.5M
  } else if (NXT(1) == '?') {
6978
31.3k
      xmlParsePI(ctxt);
6979
31.3k
  }
6980
32.6M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
32.6M
    if (ctxt->instate == XML_PARSER_EOF)
6987
30.3k
        return;
6988
6989
32.5M
    ctxt->instate = XML_PARSER_DTD;
6990
32.5M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
8.64k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
8.64k
    xmlChar *version;
7006
8.64k
    const xmlChar *encoding;
7007
8.64k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
8.64k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
8.15k
  SKIP(5);
7014
8.15k
    } else {
7015
499
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
499
  return;
7017
499
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
8.15k
    oldstate = ctxt->instate;
7021
8.15k
    ctxt->instate = XML_PARSER_START;
7022
7023
8.15k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
8.15k
    version = xmlParseVersionInfo(ctxt);
7032
8.15k
    if (version == NULL)
7033
1.91k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
6.23k
    else {
7035
6.23k
  if (SKIP_BLANKS == 0) {
7036
314
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
314
               "Space needed here\n");
7038
314
  }
7039
6.23k
    }
7040
8.15k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
8.15k
    encoding = xmlParseEncodingDecl(ctxt);
7046
8.15k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
8.15k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
135
        ctxt->instate = oldstate;
7053
135
        return;
7054
135
    }
7055
8.01k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
1.74k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
1.74k
           "Missing encoding in text declaration\n");
7058
1.74k
    }
7059
7060
8.01k
    SKIP_BLANKS;
7061
8.01k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
4.93k
        SKIP(2);
7063
4.93k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
79
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
79
  NEXT;
7067
3.00k
    } else {
7068
3.00k
        int c;
7069
7070
3.00k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
751k
        while ((c = CUR) != 0) {
7072
749k
            NEXT;
7073
749k
            if (c == '>')
7074
1.24k
                break;
7075
749k
        }
7076
3.00k
    }
7077
7078
8.01k
    ctxt->instate = oldstate;
7079
8.01k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
60.1k
                       const xmlChar *SystemID) {
7096
60.1k
    xmlDetectSAX2(ctxt);
7097
60.1k
    GROW;
7098
7099
60.1k
    if ((ctxt->encoding == NULL) &&
7100
60.1k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
59.9k
        xmlChar start[4];
7102
59.9k
  xmlCharEncoding enc;
7103
7104
59.9k
  start[0] = RAW;
7105
59.9k
  start[1] = NXT(1);
7106
59.9k
  start[2] = NXT(2);
7107
59.9k
  start[3] = NXT(3);
7108
59.9k
  enc = xmlDetectCharEncoding(start, 4);
7109
59.9k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
7.14k
      xmlSwitchEncoding(ctxt, enc);
7111
59.9k
    }
7112
7113
60.1k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
6.51k
  xmlParseTextDecl(ctxt);
7115
6.51k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
102
      xmlHaltParser(ctxt);
7120
102
      return;
7121
102
  }
7122
6.51k
    }
7123
60.0k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
60.0k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
60.0k
    ctxt->instate = XML_PARSER_DTD;
7135
60.0k
    ctxt->external = 1;
7136
60.0k
    SKIP_BLANKS;
7137
2.28M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
2.23M
  GROW;
7139
2.23M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
8.24k
            xmlParseConditionalSections(ctxt);
7141
2.22M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
2.21M
            xmlParseMarkupDecl(ctxt);
7143
2.21M
        } else {
7144
8.48k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
8.48k
            xmlHaltParser(ctxt);
7146
8.48k
            return;
7147
8.48k
        }
7148
2.22M
        SKIP_BLANKS;
7149
2.22M
    }
7150
7151
51.5k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
51.5k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
8.79M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
8.79M
    xmlEntityPtr ent;
7175
8.79M
    xmlChar *val;
7176
8.79M
    int was_checked;
7177
8.79M
    xmlNodePtr list = NULL;
7178
8.79M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
8.79M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
8.79M
    if (NXT(1) == '#') {
7188
968k
  int i = 0;
7189
968k
  xmlChar out[16];
7190
968k
  int hex = NXT(2);
7191
968k
  int value = xmlParseCharRef(ctxt);
7192
7193
968k
  if (value == 0)
7194
111k
      return;
7195
857k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
721k
      if (value <= 0xFF) {
7202
701k
    out[0] = value;
7203
701k
    out[1] = 0;
7204
701k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
701k
        (!ctxt->disableSAX))
7206
648k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
701k
      } else {
7208
20.1k
    if ((hex == 'x') || (hex == 'X'))
7209
9.32k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
10.8k
    else
7211
10.8k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
20.1k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
20.1k
        (!ctxt->disableSAX))
7214
18.5k
        ctxt->sax->reference(ctxt->userData, out);
7215
20.1k
      }
7216
721k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
135k
      COPY_BUF(0 ,out, i, value);
7221
135k
      out[i] = 0;
7222
135k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
135k
    (!ctxt->disableSAX))
7224
122k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
135k
  }
7226
857k
  return;
7227
968k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
7.82M
    ent = xmlParseEntityRef(ctxt);
7233
7.82M
    if (ent == NULL) return;
7234
3.58M
    if (!ctxt->wellFormed)
7235
958k
  return;
7236
2.62M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
2.62M
    if ((ent->name == NULL) ||
7240
2.62M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
60.1k
  val = ent->content;
7242
60.1k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
60.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
60.1k
      (!ctxt->disableSAX))
7248
60.1k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
60.1k
  return;
7250
60.1k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
2.56M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
2.56M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
108k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
103k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
103k
  void *user_data;
7273
103k
  if (ctxt->userData == ctxt)
7274
103k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
103k
        ctxt->sizeentcopy = 0;
7280
7281
103k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
658
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
658
            xmlHaltParser(ctxt);
7284
658
            return;
7285
658
        }
7286
7287
102k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
102k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
53.3k
      ctxt->depth++;
7297
53.3k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
53.3k
                                                user_data, &list);
7299
53.3k
      ctxt->depth--;
7300
7301
53.3k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
49.4k
      ctxt->depth++;
7303
49.4k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
49.4k
                                     user_data, ctxt->depth, ent->URI,
7305
49.4k
             ent->ExternalID, &list);
7306
49.4k
      ctxt->depth--;
7307
49.4k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
102k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
102k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
102k
        ent->expandedSize = ctxt->sizeentcopy;
7316
102k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
7.36k
            xmlHaltParser(ctxt);
7318
7.36k
      xmlFreeNodeList(list);
7319
7.36k
      return;
7320
7.36k
  }
7321
95.4k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
95.4k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
45.8k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
45.8k
            if ((ctxt->replaceEntities == 0) ||
7333
45.8k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
45.8k
                ((list->type == XML_TEXT_NODE) &&
7335
37.4k
                 (list->next == NULL))) {
7336
37.4k
                ent->owner = 1;
7337
154k
                while (list != NULL) {
7338
117k
                    list->parent = (xmlNodePtr) ent;
7339
117k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
117k
                    if (list->next == NULL)
7342
37.4k
                        ent->last = list;
7343
117k
                    list = list->next;
7344
117k
                }
7345
37.4k
                list = NULL;
7346
37.4k
            } else {
7347
8.40k
                ent->owner = 0;
7348
144k
                while (list != NULL) {
7349
136k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
136k
                    list->doc = ctxt->myDoc;
7351
136k
                    if (list->next == NULL)
7352
8.40k
                        ent->last = list;
7353
136k
                    list = list->next;
7354
136k
                }
7355
8.40k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
8.40k
            }
7361
49.6k
  } else if ((ret != XML_ERR_OK) &&
7362
49.6k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
25.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
25.0k
         "Entity '%s' failed to parse\n", ent->name);
7365
25.0k
            if (ent->content != NULL)
7366
4.93k
                ent->content[0] = 0;
7367
25.0k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
95.4k
        was_checked = 0;
7374
95.4k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
2.55M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
531k
  if (was_checked != 0) {
7389
477k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
477k
      if (ctxt->userData == ctxt)
7396
477k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
477k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
10.1k
    ctxt->depth++;
7402
10.1k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
10.1k
           ent->content, user_data, NULL);
7404
10.1k
    ctxt->depth--;
7405
467k
      } else if (ent->etype ==
7406
467k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
467k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
467k
    ctxt->depth++;
7410
467k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
467k
         ctxt->sax, user_data, ctxt->depth,
7412
467k
         ent->URI, ent->ExternalID, NULL);
7413
467k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
467k
                ctxt->sizeentities = oldsizeentities;
7417
467k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
477k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
477k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
477k
  }
7429
531k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
531k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
48.6k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
48.6k
  }
7437
531k
  return;
7438
531k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
2.02M
    if ((was_checked != 0) &&
7445
2.02M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
564
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
2.02M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
2.02M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
81.6k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
81.6k
  return;
7458
81.6k
    }
7459
7460
1.94M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
1.94M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
1.94M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
1.94M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
622k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
622k
    cur = ent->children;
7492
924k
    while (cur != NULL) {
7493
924k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
924k
        if (nw != NULL) {
7495
924k
      if (nw->_private == NULL)
7496
924k
          nw->_private = cur->_private;
7497
924k
      if (firstChild == NULL){
7498
622k
          firstChild = nw;
7499
622k
      }
7500
924k
      nw = xmlAddChild(ctxt->node, nw);
7501
924k
        }
7502
924k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
622k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
622k
          (nw != NULL) &&
7509
622k
          (nw->type == XML_ELEMENT_NODE) &&
7510
622k
          (nw->children == NULL))
7511
1.83k
          nw->extra = 1;
7512
7513
622k
      break;
7514
622k
        }
7515
302k
        cur = cur->next;
7516
302k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
1.32M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
1.32M
    xmlNodePtr nw = NULL, cur, next, last,
7523
1.32M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
1.32M
    cur = ent->children;
7532
1.32M
    ent->children = NULL;
7533
1.32M
    last = ent->last;
7534
1.32M
    ent->last = NULL;
7535
2.08M
    while (cur != NULL) {
7536
2.08M
        next = cur->next;
7537
2.08M
        cur->next = NULL;
7538
2.08M
        cur->parent = NULL;
7539
2.08M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
2.08M
        if (nw != NULL) {
7541
2.08M
      if (nw->_private == NULL)
7542
2.08M
          nw->_private = cur->_private;
7543
2.08M
      if (firstChild == NULL){
7544
1.32M
          firstChild = cur;
7545
1.32M
      }
7546
2.08M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
2.08M
        }
7548
2.08M
        xmlAddChild(ctxt->node, cur);
7549
2.08M
        if (cur == last)
7550
1.32M
      break;
7551
760k
        cur = next;
7552
760k
    }
7553
1.32M
    if (ent->owner == 0)
7554
8.40k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
1.32M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
1.94M
      ctxt->nodemem = 0;
7582
1.94M
      ctxt->nodelen = 0;
7583
1.94M
      return;
7584
1.94M
  }
7585
1.94M
    }
7586
1.94M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
11.3M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
11.3M
    const xmlChar *name;
7621
11.3M
    xmlEntityPtr ent = NULL;
7622
7623
11.3M
    GROW;
7624
11.3M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
11.3M
    if (RAW != '&')
7628
0
        return(NULL);
7629
11.3M
    NEXT;
7630
11.3M
    name = xmlParseName(ctxt);
7631
11.3M
    if (name == NULL) {
7632
4.28M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
4.28M
           "xmlParseEntityRef: no name\n");
7634
4.28M
        return(NULL);
7635
4.28M
    }
7636
7.07M
    if (RAW != ';') {
7637
253k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
253k
  return(NULL);
7639
253k
    }
7640
6.81M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
6.81M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
5.71M
        ent = xmlGetPredefinedEntity(name);
7647
5.71M
        if (ent != NULL)
7648
436k
            return(ent);
7649
5.71M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
6.38M
    if (ctxt->sax != NULL) {
7656
6.38M
  if (ctxt->sax->getEntity != NULL)
7657
6.38M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
6.38M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
6.38M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
15.6k
      ent = xmlGetPredefinedEntity(name);
7661
6.38M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
6.38M
      (ctxt->userData==ctxt)) {
7663
63.8k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
63.8k
  }
7665
6.38M
    }
7666
6.38M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
6.38M
    if (ent == NULL) {
7690
1.79M
  if ((ctxt->standalone == 1) ||
7691
1.79M
      ((ctxt->hasExternalSubset == 0) &&
7692
1.77M
       (ctxt->hasPErefs == 0))) {
7693
1.02M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
1.02M
         "Entity '%s' not defined\n", name);
7695
1.02M
  } else {
7696
772k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
772k
         "Entity '%s' not defined\n", name);
7698
772k
      if ((ctxt->inSubset == 0) &&
7699
772k
    (ctxt->sax != NULL) &&
7700
772k
    (ctxt->sax->reference != NULL)) {
7701
704k
    ctxt->sax->reference(ctxt->userData, name);
7702
704k
      }
7703
772k
  }
7704
1.79M
  ctxt->valid = 0;
7705
1.79M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
4.58M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
4.88k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
4.88k
     "Entity reference to unparsed entity %s\n", name);
7715
4.88k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
4.57M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
4.57M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
20.1k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
20.1k
       "Attribute references external entity '%s'\n", name);
7726
20.1k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
4.55M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
4.55M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.19M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
32.5k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
902
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
32.5k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
32.5k
        }
7740
1.19M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
8.34k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
8.34k
                    "'<' in entity '%s' is not allowed in attributes "
7743
8.34k
                    "values\n", name);
7744
1.19M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
3.36M
    else {
7750
3.36M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
3.36M
      default:
7758
3.36M
      break;
7759
3.36M
  }
7760
3.36M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
6.38M
    return(ent);
7769
6.38M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
30.4M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
30.4M
    xmlChar *name;
7805
30.4M
    const xmlChar *ptr;
7806
30.4M
    xmlChar cur;
7807
30.4M
    xmlEntityPtr ent = NULL;
7808
7809
30.4M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
30.4M
    ptr = *str;
7812
30.4M
    cur = *ptr;
7813
30.4M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
30.4M
    ptr++;
7817
30.4M
    name = xmlParseStringName(ctxt, &ptr);
7818
30.4M
    if (name == NULL) {
7819
6.58k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
6.58k
           "xmlParseStringEntityRef: no name\n");
7821
6.58k
  *str = ptr;
7822
6.58k
  return(NULL);
7823
6.58k
    }
7824
30.4M
    if (*ptr != ';') {
7825
3.02k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
3.02k
        xmlFree(name);
7827
3.02k
  *str = ptr;
7828
3.02k
  return(NULL);
7829
3.02k
    }
7830
30.4M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
30.4M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
30.3M
        ent = xmlGetPredefinedEntity(name);
7838
30.3M
        if (ent != NULL) {
7839
24.0k
            xmlFree(name);
7840
24.0k
            *str = ptr;
7841
24.0k
            return(ent);
7842
24.0k
        }
7843
30.3M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
30.4M
    if (ctxt->sax != NULL) {
7850
30.4M
  if (ctxt->sax->getEntity != NULL)
7851
30.4M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
30.4M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
21.9k
      ent = xmlGetPredefinedEntity(name);
7854
30.4M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
1.42M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
1.42M
  }
7857
30.4M
    }
7858
30.4M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
30.4M
    if (ent == NULL) {
7885
1.42M
  if ((ctxt->standalone == 1) ||
7886
1.42M
      ((ctxt->hasExternalSubset == 0) &&
7887
1.42M
       (ctxt->hasPErefs == 0))) {
7888
1.41M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
1.41M
         "Entity '%s' not defined\n", name);
7890
1.41M
  } else {
7891
7.98k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
7.98k
        "Entity '%s' not defined\n",
7893
7.98k
        name);
7894
7.98k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
1.42M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
29.0M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
96
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
96
     "Entity reference to unparsed entity %s\n", name);
7906
96
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
29.0M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
29.0M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
5.77k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
5.77k
   "Attribute references external entity '%s'\n", name);
7917
5.77k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
29.0M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
29.0M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
28.9M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
11.5k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
833
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
11.5k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
11.5k
        }
7931
28.9M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
43.3k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
43.3k
                    "'<' in entity '%s' is not allowed in attributes "
7934
43.3k
                    "values\n", name);
7935
28.9M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
46.6k
    else {
7941
46.6k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
46.6k
      default:
7949
46.6k
      break;
7950
46.6k
  }
7951
46.6k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
30.4M
    xmlFree(name);
7961
30.4M
    *str = ptr;
7962
30.4M
    return(ent);
7963
30.4M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
37.6M
{
8000
37.6M
    const xmlChar *name;
8001
37.6M
    xmlEntityPtr entity = NULL;
8002
37.6M
    xmlParserInputPtr input;
8003
8004
37.6M
    if (RAW != '%')
8005
0
        return;
8006
37.6M
    NEXT;
8007
37.6M
    name = xmlParseName(ctxt);
8008
37.6M
    if (name == NULL) {
8009
1.34M
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
1.34M
  return;
8011
1.34M
    }
8012
36.3M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
36.3M
    if (RAW != ';') {
8016
36.9k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
36.9k
        return;
8018
36.9k
    }
8019
8020
36.2M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
36.2M
    if ((ctxt->sax != NULL) &&
8026
36.2M
  (ctxt->sax->getParameterEntity != NULL))
8027
36.2M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
36.2M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
36.2M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
6.06M
  if ((ctxt->standalone == 1) ||
8040
6.06M
      ((ctxt->hasExternalSubset == 0) &&
8041
6.06M
       (ctxt->hasPErefs == 0))) {
8042
2.65k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
2.65k
            "PEReference: %%%s; not found\n",
8044
2.65k
            name);
8045
6.06M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
6.06M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
3.58M
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
3.58M
                                 "PEReference: %%%s; not found\n",
8056
3.58M
                                 name, NULL);
8057
3.58M
            } else
8058
2.47M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
2.47M
                              "PEReference: %%%s; not found\n",
8060
2.47M
                              name, NULL);
8061
6.06M
            ctxt->valid = 0;
8062
6.06M
  }
8063
30.2M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
30.2M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
30.2M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
30.2M
  } else {
8073
30.2M
            xmlChar start[4];
8074
30.2M
            xmlCharEncoding enc;
8075
30.2M
            unsigned long parentConsumed;
8076
30.2M
            xmlEntityPtr oldEnt;
8077
8078
30.2M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
30.2M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
30.2M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
30.2M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
30.2M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
30.2M
    (ctxt->replaceEntities == 0) &&
8084
30.2M
    (ctxt->validate == 0))
8085
159
    return;
8086
8087
30.2M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
221
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
221
                xmlHaltParser(ctxt);
8090
221
                return;
8091
221
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
30.2M
            parentConsumed = ctxt->input->parentConsumed;
8095
30.2M
            oldEnt = ctxt->input->entity;
8096
30.2M
            if ((oldEnt == NULL) ||
8097
30.2M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
29.9M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
532k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
532k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
532k
                                     ctxt->input->cur - ctxt->input->base);
8102
532k
            }
8103
8104
30.2M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
30.2M
      if (xmlPushInput(ctxt, input) < 0) {
8106
2.10k
                xmlFreeInputStream(input);
8107
2.10k
    return;
8108
2.10k
            }
8109
8110
30.1M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
30.1M
            input->parentConsumed = parentConsumed;
8113
8114
30.1M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
8.28k
                GROW
8125
8.28k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
8.28k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
8.25k
                    start[0] = RAW;
8129
8.25k
                    start[1] = NXT(1);
8130
8.25k
                    start[2] = NXT(2);
8131
8.25k
                    start[3] = NXT(3);
8132
8.25k
                    enc = xmlDetectCharEncoding(start, 4);
8133
8.25k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
539
                        xmlSwitchEncoding(ctxt, enc);
8135
539
                    }
8136
8.25k
                }
8137
8138
8.28k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
8.28k
                    (IS_BLANK_CH(NXT(5)))) {
8140
358
                    xmlParseTextDecl(ctxt);
8141
358
                }
8142
8.28k
            }
8143
30.1M
  }
8144
30.2M
    }
8145
36.2M
    ctxt->hasPErefs = 1;
8146
36.2M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
5.23k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
5.23k
    xmlParserInputPtr input;
8162
5.23k
    xmlBufferPtr buf;
8163
5.23k
    int l, c;
8164
5.23k
    int count = 0;
8165
8166
5.23k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
5.23k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
5.23k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
5.23k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
5.23k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
5.23k
    buf = xmlBufferCreate();
8180
5.23k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
5.23k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
5.23k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
5.23k
    if (input == NULL) {
8189
425
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
425
              "xmlLoadEntityContent input error");
8191
425
  xmlBufferFree(buf);
8192
425
        return(-1);
8193
425
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
4.80k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
4.80k
    GROW;
8206
4.80k
    c = CUR_CHAR(l);
8207
46.4M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
46.4M
           (IS_CHAR(c))) {
8209
46.4M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
46.4M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
453k
      count = 0;
8212
453k
      GROW;
8213
453k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
453k
  }
8218
46.4M
  NEXTL(l);
8219
46.4M
  c = CUR_CHAR(l);
8220
46.4M
  if (c == 0) {
8221
3.31k
      count = 0;
8222
3.31k
      GROW;
8223
3.31k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
3.31k
      c = CUR_CHAR(l);
8228
3.31k
  }
8229
46.4M
    }
8230
8231
4.80k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
2.06k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
2.06k
        xmlPopInput(ctxt);
8234
2.73k
    } else if (!IS_CHAR(c)) {
8235
2.73k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
2.73k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
2.73k
                    c);
8238
2.73k
  xmlBufferFree(buf);
8239
2.73k
  return(-1);
8240
2.73k
    }
8241
2.06k
    entity->content = buf->content;
8242
2.06k
    entity->length = buf->use;
8243
2.06k
    buf->content = NULL;
8244
2.06k
    xmlBufferFree(buf);
8245
8246
2.06k
    return(0);
8247
4.80k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
1.62M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
1.62M
    const xmlChar *ptr;
8283
1.62M
    xmlChar cur;
8284
1.62M
    xmlChar *name;
8285
1.62M
    xmlEntityPtr entity = NULL;
8286
8287
1.62M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
1.62M
    ptr = *str;
8289
1.62M
    cur = *ptr;
8290
1.62M
    if (cur != '%')
8291
0
        return(NULL);
8292
1.62M
    ptr++;
8293
1.62M
    name = xmlParseStringName(ctxt, &ptr);
8294
1.62M
    if (name == NULL) {
8295
1.43M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
1.43M
           "xmlParseStringPEReference: no name\n");
8297
1.43M
  *str = ptr;
8298
1.43M
  return(NULL);
8299
1.43M
    }
8300
181k
    cur = *ptr;
8301
181k
    if (cur != ';') {
8302
3.32k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
3.32k
  xmlFree(name);
8304
3.32k
  *str = ptr;
8305
3.32k
  return(NULL);
8306
3.32k
    }
8307
178k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
178k
    if ((ctxt->sax != NULL) &&
8313
178k
  (ctxt->sax->getParameterEntity != NULL))
8314
178k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
178k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
178k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
33.3k
  if ((ctxt->standalone == 1) ||
8330
33.3k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
33.3k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
33.3k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
33.3k
        "PEReference: %%%s; not found\n",
8343
33.3k
        name, NULL);
8344
33.3k
      ctxt->valid = 0;
8345
33.3k
  }
8346
144k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
144k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
144k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
144k
    }
8357
178k
    ctxt->hasPErefs = 1;
8358
178k
    xmlFree(name);
8359
178k
    *str = ptr;
8360
178k
    return(entity);
8361
178k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
450k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
450k
    const xmlChar *name = NULL;
8382
450k
    xmlChar *ExternalID = NULL;
8383
450k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
450k
    SKIP(9);
8389
8390
450k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
450k
    name = xmlParseName(ctxt);
8396
450k
    if (name == NULL) {
8397
1.38k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
1.38k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
1.38k
    }
8400
450k
    ctxt->intSubName = name;
8401
8402
450k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
450k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
450k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
154k
        ctxt->hasExternalSubset = 1;
8411
154k
    }
8412
450k
    ctxt->extSubURI = URI;
8413
450k
    ctxt->extSubSystem = ExternalID;
8414
8415
450k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
450k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
450k
  (!ctxt->disableSAX))
8422
439k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
450k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
450k
    if (RAW == '[')
8431
327k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
123k
    if (RAW != '>') {
8437
14.2k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
14.2k
    }
8439
123k
    NEXT;
8440
123k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
326k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
326k
    if (RAW == '[') {
8457
326k
        int baseInputNr = ctxt->inputNr;
8458
326k
        ctxt->instate = XML_PARSER_DTD;
8459
326k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
326k
  SKIP_BLANKS;
8466
30.8M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
30.8M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
30.5M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
30.5M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
30.5M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
30.3M
          xmlParseMarkupDecl(ctxt);
8478
30.3M
            } else if (RAW == '%') {
8479
119k
          xmlParsePEReference(ctxt);
8480
119k
            } else {
8481
96.1k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
96.1k
                        "xmlParseInternalSubset: error detected in"
8483
96.1k
                        " Markup declaration\n");
8484
96.1k
                xmlHaltParser(ctxt);
8485
96.1k
                return;
8486
96.1k
            }
8487
30.4M
      SKIP_BLANKS;
8488
30.4M
  }
8489
230k
  if (RAW == ']') {
8490
205k
      NEXT;
8491
205k
      SKIP_BLANKS;
8492
205k
  }
8493
230k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
230k
    if (RAW != '>') {
8499
25.6k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
25.6k
  return;
8501
25.6k
    }
8502
204k
    NEXT;
8503
204k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
8.24M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
8.24M
    const xmlChar *name;
8544
8.24M
    xmlChar *val;
8545
8546
8.24M
    *value = NULL;
8547
8.24M
    GROW;
8548
8.24M
    name = xmlParseName(ctxt);
8549
8.24M
    if (name == NULL) {
8550
1.16M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
1.16M
                 "error parsing attribute name\n");
8552
1.16M
        return(NULL);
8553
1.16M
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
7.08M
    SKIP_BLANKS;
8559
7.08M
    if (RAW == '=') {
8560
6.47M
        NEXT;
8561
6.47M
  SKIP_BLANKS;
8562
6.47M
  val = xmlParseAttValue(ctxt);
8563
6.47M
  ctxt->instate = XML_PARSER_CONTENT;
8564
6.47M
    } else {
8565
605k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
605k
         "Specification mandates value for attribute %s\n", name);
8567
605k
  return(name);
8568
605k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
6.47M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
159k
  if (!xmlCheckLanguageID(val)) {
8577
106k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
106k
              "Malformed value for xml:lang : %s\n",
8579
106k
        val, NULL);
8580
106k
  }
8581
159k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
6.47M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
1.46k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
1.46k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
397
      *(ctxt->space) = 1;
8591
1.06k
  else {
8592
1.06k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
1.06k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
1.06k
                                 val, NULL);
8595
1.06k
  }
8596
1.46k
    }
8597
8598
6.47M
    *value = val;
8599
6.47M
    return(name);
8600
7.08M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
6.83M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
6.83M
    const xmlChar *name;
8634
6.83M
    const xmlChar *attname;
8635
6.83M
    xmlChar *attvalue;
8636
6.83M
    const xmlChar **atts = ctxt->atts;
8637
6.83M
    int nbatts = 0;
8638
6.83M
    int maxatts = ctxt->maxatts;
8639
6.83M
    int i;
8640
8641
6.83M
    if (RAW != '<') return(NULL);
8642
6.83M
    NEXT1;
8643
8644
6.83M
    name = xmlParseName(ctxt);
8645
6.83M
    if (name == NULL) {
8646
323k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
323k
       "xmlParseStartTag: invalid element name\n");
8648
323k
        return(NULL);
8649
323k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
6.50M
    SKIP_BLANKS;
8657
6.50M
    GROW;
8658
8659
10.5M
    while (((RAW != '>') &&
8660
10.5M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
10.5M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
8.24M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
8.24M
        if (attname == NULL) {
8664
1.16M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
1.16M
         "xmlParseStartTag: problem parsing attributes\n");
8666
1.16M
      break;
8667
1.16M
  }
8668
7.08M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
9.02M
      for (i = 0; i < nbatts;i += 2) {
8675
2.62M
          if (xmlStrEqual(atts[i], attname)) {
8676
31.4k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
31.4k
        xmlFree(attvalue);
8678
31.4k
        goto failed;
8679
31.4k
    }
8680
2.62M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
6.39M
      if (atts == NULL) {
8685
113k
          maxatts = 22; /* allow for 10 attrs by default */
8686
113k
          atts = (const xmlChar **)
8687
113k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
113k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
113k
    ctxt->atts = atts;
8695
113k
    ctxt->maxatts = maxatts;
8696
6.28M
      } else if (nbatts + 4 > maxatts) {
8697
163
          const xmlChar **n;
8698
8699
163
          maxatts *= 2;
8700
163
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
163
               maxatts * sizeof(const xmlChar *));
8702
163
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
163
    atts = n;
8709
163
    ctxt->atts = atts;
8710
163
    ctxt->maxatts = maxatts;
8711
163
      }
8712
6.39M
      atts[nbatts++] = attname;
8713
6.39M
      atts[nbatts++] = attvalue;
8714
6.39M
      atts[nbatts] = NULL;
8715
6.39M
      atts[nbatts + 1] = NULL;
8716
6.39M
  } else {
8717
651k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
651k
  }
8720
8721
7.08M
failed:
8722
8723
7.08M
  GROW
8724
7.08M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
3.07M
      break;
8726
4.00M
  if (SKIP_BLANKS == 0) {
8727
1.45M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
1.45M
         "attributes construct error\n");
8729
1.45M
  }
8730
4.00M
  SHRINK;
8731
4.00M
        GROW;
8732
4.00M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
6.50M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
6.50M
  (!ctxt->disableSAX)) {
8739
6.25M
  if (nbatts > 0)
8740
3.85M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
2.40M
  else
8742
2.40M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
6.25M
    }
8744
8745
6.50M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
12.5M
        for (i = 1;i < nbatts;i+=2)
8748
6.39M
      if (atts[i] != NULL)
8749
6.39M
         xmlFree((xmlChar *) atts[i]);
8750
6.14M
    }
8751
6.50M
    return(name);
8752
6.50M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.72M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.72M
    const xmlChar *name;
8772
8773
1.72M
    GROW;
8774
1.72M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.72M
    SKIP(2);
8780
8781
1.72M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.72M
    GROW;
8787
1.72M
    SKIP_BLANKS;
8788
1.72M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
172k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
172k
    } else
8791
1.55M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.72M
    if (name != (xmlChar*)1) {
8800
417k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
417k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
417k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
417k
                    ctxt->name, line, name);
8804
417k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.72M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.72M
  (!ctxt->disableSAX))
8811
1.59M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.72M
    namePop(ctxt);
8814
1.72M
    spacePop(ctxt);
8815
1.72M
    return;
8816
1.72M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
13.0M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
13.0M
    int i;
8858
8859
13.0M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
126M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
116M
        if (ctxt->nsTab[i] == prefix) {
8862
2.77M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
82.1k
          return(NULL);
8864
2.69M
      return(ctxt->nsTab[i + 1]);
8865
2.77M
  }
8866
9.96M
    return(NULL);
8867
12.7M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
28.7M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
28.7M
    const xmlChar *l, *p;
8886
8887
28.7M
    GROW;
8888
8889
28.7M
    l = xmlParseNCName(ctxt);
8890
28.7M
    if (l == NULL) {
8891
2.78M
        if (CUR == ':') {
8892
39.0k
      l = xmlParseName(ctxt);
8893
39.0k
      if (l != NULL) {
8894
38.9k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
38.9k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
38.9k
    *prefix = NULL;
8897
38.9k
    return(l);
8898
38.9k
      }
8899
39.0k
  }
8900
2.74M
        return(NULL);
8901
2.78M
    }
8902
25.9M
    if (CUR == ':') {
8903
3.83M
        NEXT;
8904
3.83M
  p = l;
8905
3.83M
  l = xmlParseNCName(ctxt);
8906
3.83M
  if (l == NULL) {
8907
145k
      xmlChar *tmp;
8908
8909
145k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
145k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
145k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
145k
      l = xmlParseNmtoken(ctxt);
8914
145k
      if (l == NULL) {
8915
106k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
106k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
106k
            } else {
8919
38.8k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
38.8k
    xmlFree((char *)l);
8921
38.8k
      }
8922
145k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
145k
      if (tmp != NULL) xmlFree(tmp);
8924
145k
      *prefix = NULL;
8925
145k
      return(p);
8926
145k
  }
8927
3.69M
  if (CUR == ':') {
8928
69.8k
      xmlChar *tmp;
8929
8930
69.8k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
69.8k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
69.8k
      NEXT;
8933
69.8k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
69.8k
      if (tmp != NULL) {
8935
55.4k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
55.4k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
55.4k
    if (tmp != NULL) xmlFree(tmp);
8938
55.4k
    *prefix = p;
8939
55.4k
    return(l);
8940
55.4k
      }
8941
14.3k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
14.3k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
14.3k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
14.3k
      if (tmp != NULL) xmlFree(tmp);
8946
14.3k
      *prefix = p;
8947
14.3k
      return(l);
8948
14.3k
  }
8949
3.62M
  *prefix = p;
8950
3.62M
    } else
8951
22.0M
        *prefix = NULL;
8952
25.7M
    return(l);
8953
25.9M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
760k
                        xmlChar const *prefix) {
8971
760k
    const xmlChar *cmp;
8972
760k
    const xmlChar *in;
8973
760k
    const xmlChar *ret;
8974
760k
    const xmlChar *prefix2;
8975
8976
760k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
760k
    GROW;
8979
760k
    in = ctxt->input->cur;
8980
8981
760k
    cmp = prefix;
8982
2.36M
    while (*in != 0 && *in == *cmp) {
8983
1.60M
  ++in;
8984
1.60M
  ++cmp;
8985
1.60M
    }
8986
760k
    if ((*cmp == 0) && (*in == ':')) {
8987
608k
        in++;
8988
608k
  cmp = name;
8989
3.61M
  while (*in != 0 && *in == *cmp) {
8990
3.00M
      ++in;
8991
3.00M
      ++cmp;
8992
3.00M
  }
8993
608k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
373k
            ctxt->input->col += in - ctxt->input->cur;
8996
373k
      ctxt->input->cur = in;
8997
373k
      return((const xmlChar*) 1);
8998
373k
  }
8999
608k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
387k
    ret = xmlParseQName (ctxt, &prefix2);
9004
387k
    if ((ret == name) && (prefix == prefix2))
9005
2.30k
  return((const xmlChar*) 1);
9006
384k
    return ret;
9007
387k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
10.6k
    const xmlChar *oldbase = ctxt->input->base;\
9045
10.6k
    GROW;\
9046
10.6k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
10.6k
        return(NULL);\
9048
10.6k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
10.6k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
20.0M
{
9059
20.0M
    xmlChar limit = 0;
9060
20.0M
    const xmlChar *in = NULL, *start, *end, *last;
9061
20.0M
    xmlChar *ret = NULL;
9062
20.0M
    int line, col;
9063
20.0M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
5.32M
                    XML_MAX_HUGE_LENGTH :
9065
20.0M
                    XML_MAX_TEXT_LENGTH;
9066
9067
20.0M
    GROW;
9068
20.0M
    in = (xmlChar *) CUR_PTR;
9069
20.0M
    line = ctxt->input->line;
9070
20.0M
    col = ctxt->input->col;
9071
20.0M
    if (*in != '"' && *in != '\'') {
9072
116k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
116k
        return (NULL);
9074
116k
    }
9075
19.9M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
19.9M
    limit = *in++;
9083
19.9M
    col++;
9084
19.9M
    end = ctxt->input->end;
9085
19.9M
    start = in;
9086
19.9M
    if (in >= end) {
9087
997
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
997
    }
9089
19.9M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
1.45M
  while ((in < end) && (*in != limit) &&
9094
1.45M
         ((*in == 0x20) || (*in == 0x9) ||
9095
1.44M
          (*in == 0xA) || (*in == 0xD))) {
9096
1.05M
      if (*in == 0xA) {
9097
343k
          line++; col = 1;
9098
715k
      } else {
9099
715k
          col++;
9100
715k
      }
9101
1.05M
      in++;
9102
1.05M
      start = in;
9103
1.05M
      if (in >= end) {
9104
267
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
267
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
267
      }
9111
1.05M
  }
9112
34.5M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
34.5M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
34.1M
      col++;
9115
34.1M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
34.1M
      if (in >= end) {
9117
571
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
571
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
571
      }
9124
34.1M
  }
9125
399k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
419k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
804k
  while ((in < end) && (*in != limit) &&
9131
804k
         ((*in == 0x20) || (*in == 0x9) ||
9132
547k
          (*in == 0xA) || (*in == 0xD))) {
9133
405k
      if (*in == 0xA) {
9134
106k
          line++, col = 1;
9135
298k
      } else {
9136
298k
          col++;
9137
298k
      }
9138
405k
      in++;
9139
405k
      if (in >= end) {
9140
498
    const xmlChar *oldbase = ctxt->input->base;
9141
498
    GROW;
9142
498
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
498
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
498
    end = ctxt->input->end;
9151
498
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
498
      }
9157
405k
  }
9158
399k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
399k
  if (*in != limit) goto need_complex;
9164
19.5M
    } else {
9165
1.14G
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
1.14G
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
1.13G
      in++;
9168
1.13G
      col++;
9169
1.13G
      if (in >= end) {
9170
8.78k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
8.78k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
8.78k
      }
9177
1.13G
  }
9178
19.5M
  last = in;
9179
19.5M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
19.5M
  if (*in != limit) goto need_complex;
9185
19.5M
    }
9186
17.3M
    in++;
9187
17.3M
    col++;
9188
17.3M
    if (len != NULL) {
9189
11.6M
        if (alloc) *alloc = 0;
9190
11.6M
        *len = last - start;
9191
11.6M
        ret = (xmlChar *) start;
9192
11.6M
    } else {
9193
5.71M
        if (alloc) *alloc = 1;
9194
5.71M
        ret = xmlStrndup(start, last - start);
9195
5.71M
    }
9196
17.3M
    CUR_PTR = in;
9197
17.3M
    ctxt->input->line = line;
9198
17.3M
    ctxt->input->col = col;
9199
17.3M
    return ret;
9200
2.56M
need_complex:
9201
2.56M
    if (alloc) *alloc = 1;
9202
2.56M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
19.9M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
14.1M
{
9226
14.1M
    const xmlChar *name;
9227
14.1M
    xmlChar *val, *internal_val = NULL;
9228
14.1M
    int normalize = 0;
9229
9230
14.1M
    *value = NULL;
9231
14.1M
    GROW;
9232
14.1M
    name = xmlParseQName(ctxt, prefix);
9233
14.1M
    if (name == NULL) {
9234
617k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
617k
                       "error parsing attribute name\n");
9236
617k
        return (NULL);
9237
617k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
13.5M
    if (ctxt->attsSpecial != NULL) {
9243
1.60M
        int type;
9244
9245
1.60M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
1.60M
                                                 pref, elem, *prefix, name);
9247
1.60M
        if (type != 0)
9248
406k
            normalize = 1;
9249
1.60M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
13.5M
    SKIP_BLANKS;
9255
13.5M
    if (RAW == '=') {
9256
13.0M
        NEXT;
9257
13.0M
        SKIP_BLANKS;
9258
13.0M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
13.0M
        if (val == NULL)
9260
65.9k
            return (NULL);
9261
13.0M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
399k
      if (*alloc) {
9269
144k
          const xmlChar *val2;
9270
9271
144k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
144k
    if ((val2 != NULL) && (val2 != val)) {
9273
33.8k
        xmlFree(val);
9274
33.8k
        val = (xmlChar *) val2;
9275
33.8k
    }
9276
144k
      }
9277
399k
  }
9278
13.0M
        ctxt->instate = XML_PARSER_CONTENT;
9279
13.0M
    } else {
9280
487k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
487k
                          "Specification mandates value for attribute %s\n",
9282
487k
                          name);
9283
487k
        return (name);
9284
487k
    }
9285
9286
13.0M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
304k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
146k
            internal_val = xmlStrndup(val, *len);
9294
146k
            if (!xmlCheckLanguageID(internal_val)) {
9295
95.6k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
95.6k
                              "Malformed value for xml:lang : %s\n",
9297
95.6k
                              internal_val, NULL);
9298
95.6k
            }
9299
146k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
304k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
747
            internal_val = xmlStrndup(val, *len);
9306
747
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
747
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
245
                *(ctxt->space) = 1;
9310
502
            else {
9311
502
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
502
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
502
                              internal_val, NULL);
9314
502
            }
9315
747
        }
9316
304k
        if (internal_val) {
9317
146k
            xmlFree(internal_val);
9318
146k
        }
9319
304k
    }
9320
9321
13.0M
    *value = val;
9322
13.0M
    return (name);
9323
13.5M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
14.1M
                  const xmlChar **URI, int *tlen) {
9356
14.1M
    const xmlChar *localname;
9357
14.1M
    const xmlChar *prefix;
9358
14.1M
    const xmlChar *attname;
9359
14.1M
    const xmlChar *aprefix;
9360
14.1M
    const xmlChar *nsname;
9361
14.1M
    xmlChar *attvalue;
9362
14.1M
    const xmlChar **atts = ctxt->atts;
9363
14.1M
    int maxatts = ctxt->maxatts;
9364
14.1M
    int nratts, nbatts, nbdef, inputid;
9365
14.1M
    int i, j, nbNs, attval;
9366
14.1M
    unsigned long cur;
9367
14.1M
    int nsNr = ctxt->nsNr;
9368
9369
14.1M
    if (RAW != '<') return(NULL);
9370
14.1M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
14.1M
    SHRINK;
9380
14.1M
    cur = ctxt->input->cur - ctxt->input->base;
9381
14.1M
    inputid = ctxt->input->id;
9382
14.1M
    nbatts = 0;
9383
14.1M
    nratts = 0;
9384
14.1M
    nbdef = 0;
9385
14.1M
    nbNs = 0;
9386
14.1M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
14.1M
    ctxt->nsNr = nsNr;
9389
9390
14.1M
    localname = xmlParseQName(ctxt, &prefix);
9391
14.1M
    if (localname == NULL) {
9392
2.12M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
2.12M
           "StartTag: invalid element name\n");
9394
2.12M
        return(NULL);
9395
2.12M
    }
9396
12.0M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
12.0M
    SKIP_BLANKS;
9404
12.0M
    GROW;
9405
9406
17.5M
    while (((RAW != '>') &&
9407
17.5M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
17.5M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
14.1M
  int len = -1, alloc = 0;
9410
9411
14.1M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
14.1M
                               &aprefix, &attvalue, &len, &alloc);
9413
14.1M
        if (attname == NULL) {
9414
683k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
683k
           "xmlParseStartTag: problem parsing attributes\n");
9416
683k
      break;
9417
683k
  }
9418
13.4M
        if (attvalue == NULL)
9419
487k
            goto next_attr;
9420
13.0M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
13.0M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
561k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
561k
            xmlURIPtr uri;
9425
9426
561k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
561k
            if (*URL != 0) {
9434
542k
                uri = xmlParseURI((const char *) URL);
9435
542k
                if (uri == NULL) {
9436
245k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
245k
                             "xmlns: '%s' is not a valid URI\n",
9438
245k
                                       URL, NULL, NULL);
9439
297k
                } else {
9440
297k
                    if (uri->scheme == NULL) {
9441
104k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
104k
                                  "xmlns: URI %s is not absolute\n",
9443
104k
                                  URL, NULL, NULL);
9444
104k
                    }
9445
297k
                    xmlFreeURI(uri);
9446
297k
                }
9447
542k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
542k
                if ((len == 29) &&
9456
542k
                    (xmlStrEqual(URL,
9457
6.96k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
542k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
583k
            for (j = 1;j <= nbNs;j++)
9468
23.8k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
1.75k
                    break;
9470
561k
            if (j <= nbNs)
9471
1.75k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
559k
            else
9473
559k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
12.4M
        } else if (aprefix == ctxt->str_xmlns) {
9476
472k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
472k
            xmlURIPtr uri;
9478
9479
472k
            if (attname == ctxt->str_xml) {
9480
608
                if (URL != ctxt->str_xml_ns) {
9481
608
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
608
                             "xml namespace prefix mapped to wrong URI\n",
9483
608
                             NULL, NULL, NULL);
9484
608
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
608
                goto next_attr;
9489
608
            }
9490
472k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
472k
            if (attname == ctxt->str_xmlns) {
9499
132
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
132
                         "redefinition of the xmlns prefix is forbidden\n",
9501
132
                         NULL, NULL, NULL);
9502
132
                goto next_attr;
9503
132
            }
9504
472k
            if ((len == 29) &&
9505
472k
                (xmlStrEqual(URL,
9506
19.9k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
472k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
1.86k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
1.86k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
1.86k
                              attname, NULL, NULL);
9516
1.86k
                goto next_attr;
9517
470k
            } else {
9518
470k
                uri = xmlParseURI((const char *) URL);
9519
470k
                if (uri == NULL) {
9520
151k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
151k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
151k
                                       attname, URL, NULL);
9523
318k
                } else {
9524
318k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
54.6k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
54.6k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
54.6k
                                  attname, URL, NULL);
9528
54.6k
                    }
9529
318k
                    xmlFreeURI(uri);
9530
318k
                }
9531
470k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
665k
            for (j = 1;j <= nbNs;j++)
9537
206k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
11.3k
                    break;
9539
470k
            if (j <= nbNs)
9540
11.3k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
458k
            else
9542
458k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
11.9M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
11.9M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
148k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
148k
                maxatts = ctxt->maxatts;
9553
148k
                atts = ctxt->atts;
9554
148k
            }
9555
11.9M
            ctxt->attallocs[nratts++] = alloc;
9556
11.9M
            atts[nbatts++] = attname;
9557
11.9M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
11.9M
            if (alloc)
9565
898k
                atts[nbatts++] = NULL;
9566
11.0M
            else
9567
11.0M
                atts[nbatts++] = ctxt->input->base;
9568
11.9M
            atts[nbatts++] = attvalue;
9569
11.9M
            attvalue += len;
9570
11.9M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
11.9M
            if (alloc != 0) attval = 1;
9575
11.9M
            attvalue = NULL; /* moved into atts */
9576
11.9M
        }
9577
9578
13.4M
next_attr:
9579
13.4M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
482k
            xmlFree(attvalue);
9581
482k
            attvalue = NULL;
9582
482k
        }
9583
9584
13.4M
  GROW
9585
13.4M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
13.4M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
6.59M
      break;
9589
6.90M
  if (SKIP_BLANKS == 0) {
9590
1.40M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
1.40M
         "attributes construct error\n");
9592
1.40M
      break;
9593
1.40M
  }
9594
5.49M
        GROW;
9595
5.49M
    }
9596
9597
12.0M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
24.0M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
11.9M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
11.0M
            const xmlChar *old = atts[i+2];
9612
11.0M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
11.0M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
11.0M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
11.0M
        }
9616
11.9M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
12.0M
    if (ctxt->attsDefault != NULL) {
9622
1.30M
        xmlDefAttrsPtr defaults;
9623
9624
1.30M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
1.30M
  if (defaults != NULL) {
9626
579k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
361k
          attname = defaults->values[5 * i];
9628
361k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
361k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
133k
        for (j = 1;j <= nbNs;j++)
9638
40.3k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
24.7k
          break;
9640
117k
              if (j <= nbNs) continue;
9641
9642
93.1k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
93.1k
        if (nsname != defaults->values[5 * i + 2]) {
9644
25.2k
      if (nsPush(ctxt, NULL,
9645
25.2k
                 defaults->values[5 * i + 2]) > 0)
9646
25.2k
          nbNs++;
9647
25.2k
        }
9648
244k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
54.5k
        for (j = 1;j <= nbNs;j++)
9653
19.7k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
1.90k
          break;
9655
36.7k
              if (j <= nbNs) continue;
9656
9657
34.7k
        nsname = xmlGetNamespace(ctxt, attname);
9658
34.7k
        if (nsname != defaults->values[5 * i + 2]) {
9659
23.2k
      if (nsPush(ctxt, attname,
9660
23.2k
                 defaults->values[5 * i + 2]) > 0)
9661
23.2k
          nbNs++;
9662
23.2k
        }
9663
207k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
589k
        for (j = 0;j < nbatts;j+=5) {
9668
383k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
1.62k
          break;
9670
383k
        }
9671
207k
        if (j < nbatts) continue;
9672
9673
205k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
14.9k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
14.9k
      maxatts = ctxt->maxatts;
9679
14.9k
      atts = ctxt->atts;
9680
14.9k
        }
9681
205k
        atts[nbatts++] = attname;
9682
205k
        atts[nbatts++] = aprefix;
9683
205k
        if (aprefix == NULL)
9684
139k
      atts[nbatts++] = NULL;
9685
66.0k
        else
9686
66.0k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
205k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
205k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
205k
        if ((ctxt->standalone == 1) &&
9690
205k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
205k
        nbdef++;
9696
205k
    }
9697
361k
      }
9698
217k
  }
9699
1.30M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
24.2M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
12.1M
  if (atts[i + 1] != NULL) {
9709
840k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
840k
      if (nsname == NULL) {
9711
373k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
373k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
373k
        atts[i + 1], atts[i], localname);
9714
373k
      }
9715
840k
      atts[i + 2] = nsname;
9716
840k
  } else
9717
11.3M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
17.7M
        for (j = 0; j < i;j += 5) {
9725
5.56M
      if (atts[i] == atts[j]) {
9726
115k
          if (atts[i+1] == atts[j+1]) {
9727
37.3k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
37.3k
        break;
9729
37.3k
    }
9730
78.6k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
337
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
337
           "Namespaced Attribute %s in '%s' redefined\n",
9733
337
           atts[i], nsname, NULL);
9734
337
        break;
9735
337
    }
9736
78.6k
      }
9737
5.56M
  }
9738
12.1M
    }
9739
9740
12.0M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
12.0M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
1.36M
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
1.36M
           "Namespace prefix %s on %s is not defined\n",
9744
1.36M
     prefix, localname, NULL);
9745
1.36M
    }
9746
12.0M
    *pref = prefix;
9747
12.0M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
12.0M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
12.0M
  (!ctxt->disableSAX)) {
9754
10.7M
  if (nbNs > 0)
9755
755k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
755k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
755k
        nbatts / 5, nbdef, atts);
9758
10.0M
  else
9759
10.0M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
10.0M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
10.7M
    }
9762
9763
12.0M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
12.0M
    if (attval != 0) {
9768
1.81M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
1.02M
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
898k
          xmlFree((xmlChar *) atts[i]);
9771
784k
    }
9772
9773
12.0M
    return(localname);
9774
12.0M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
3.37M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
3.37M
    const xmlChar *name;
9794
9795
3.37M
    GROW;
9796
3.37M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
3.37M
    SKIP(2);
9801
9802
3.37M
    if (tag->prefix == NULL)
9803
2.61M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
760k
    else
9805
760k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
3.37M
    GROW;
9811
3.37M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
3.37M
    SKIP_BLANKS;
9814
3.37M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
295k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
295k
    } else
9817
3.07M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
3.37M
    if (name != (xmlChar*)1) {
9826
716k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
716k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
716k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
716k
                    ctxt->name, tag->line, name);
9830
716k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
3.37M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
3.37M
  (!ctxt->disableSAX))
9837
2.90M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
2.90M
                                tag->URI);
9839
9840
3.37M
    spacePop(ctxt);
9841
3.37M
    if (tag->nsNr != 0)
9842
80.9k
  nsPop(ctxt, tag->nsNr);
9843
3.37M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
65.6k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
65.6k
    xmlChar *buf = NULL;
9864
65.6k
    int len = 0;
9865
65.6k
    int size = XML_PARSER_BUFFER_SIZE;
9866
65.6k
    int r, rl;
9867
65.6k
    int s, sl;
9868
65.6k
    int cur, l;
9869
65.6k
    int count = 0;
9870
65.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
18.7k
                    XML_MAX_HUGE_LENGTH :
9872
65.6k
                    XML_MAX_TEXT_LENGTH;
9873
9874
65.6k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
65.6k
    SKIP(3);
9877
9878
65.6k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
65.6k
    SKIP(6);
9881
9882
65.6k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
65.6k
    r = CUR_CHAR(rl);
9884
65.6k
    if (!IS_CHAR(r)) {
9885
11.3k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
11.3k
        goto out;
9887
11.3k
    }
9888
54.2k
    NEXTL(rl);
9889
54.2k
    s = CUR_CHAR(sl);
9890
54.2k
    if (!IS_CHAR(s)) {
9891
727
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
727
        goto out;
9893
727
    }
9894
53.5k
    NEXTL(sl);
9895
53.5k
    cur = CUR_CHAR(l);
9896
53.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
53.5k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
18.9M
    while (IS_CHAR(cur) &&
9902
18.9M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
18.9M
  if (len + 5 >= size) {
9904
23.1k
      xmlChar *tmp;
9905
9906
23.1k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
23.1k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
23.1k
      buf = tmp;
9912
23.1k
      size *= 2;
9913
23.1k
  }
9914
18.9M
  COPY_BUF(rl,buf,len,r);
9915
18.9M
  r = s;
9916
18.9M
  rl = sl;
9917
18.9M
  s = cur;
9918
18.9M
  sl = l;
9919
18.9M
  count++;
9920
18.9M
  if (count > 50) {
9921
354k
      SHRINK;
9922
354k
      GROW;
9923
354k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
354k
      count = 0;
9927
354k
  }
9928
18.9M
  NEXTL(l);
9929
18.9M
  cur = CUR_CHAR(l);
9930
18.9M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
18.9M
    }
9936
53.5k
    buf[len] = 0;
9937
53.5k
    if (cur != '>') {
9938
12.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
12.6k
                       "CData section not finished\n%.50s\n", buf);
9940
12.6k
        goto out;
9941
12.6k
    }
9942
40.8k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
40.8k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
24.6k
  if (ctxt->sax->cdataBlock != NULL)
9949
16.8k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
7.83k
  else if (ctxt->sax->characters != NULL)
9951
7.83k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
24.6k
    }
9953
9954
65.6k
out:
9955
65.6k
    if (ctxt->instate != XML_PARSER_EOF)
9956
65.6k
        ctxt->instate = XML_PARSER_CONTENT;
9957
65.6k
    xmlFree(buf);
9958
65.6k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
247k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
247k
    int nameNr = ctxt->nameNr;
9971
9972
247k
    GROW;
9973
27.9M
    while ((RAW != 0) &&
9974
27.9M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
27.7M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
27.7M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
64.2k
      xmlParsePI(ctxt);
9982
64.2k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
27.6M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
65.6k
      xmlParseCDSect(ctxt);
9990
65.6k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
27.6M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
27.6M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
260k
      xmlParseComment(ctxt);
9998
260k
      ctxt->instate = XML_PARSER_CONTENT;
9999
260k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
27.3M
  else if (*cur == '<') {
10005
8.97M
            if (NXT(1) == '/') {
10006
1.75M
                if (ctxt->nameNr <= nameNr)
10007
35.4k
                    break;
10008
1.72M
          xmlParseElementEnd(ctxt);
10009
7.21M
            } else {
10010
7.21M
          xmlParseElementStart(ctxt);
10011
7.21M
            }
10012
8.97M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
18.3M
  else if (*cur == '&') {
10020
3.21M
      xmlParseReference(ctxt);
10021
3.21M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
15.1M
  else {
10027
15.1M
      xmlParseCharData(ctxt, 0);
10028
15.1M
  }
10029
10030
27.7M
  GROW;
10031
27.7M
  SHRINK;
10032
27.7M
    }
10033
247k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
138k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
138k
    int nameNr = ctxt->nameNr;
10047
10048
138k
    xmlParseContentInternal(ctxt);
10049
10050
138k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
4.28k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
4.28k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
4.28k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
4.28k
                "Premature end of data in tag %s line %d\n",
10055
4.28k
    name, line, NULL);
10056
4.28k
    }
10057
138k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
192k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
192k
    if (xmlParseElementStart(ctxt) != 0)
10078
83.2k
        return;
10079
10080
109k
    xmlParseContentInternal(ctxt);
10081
109k
    if (ctxt->instate == XML_PARSER_EOF)
10082
545
  return;
10083
10084
108k
    if (CUR == 0) {
10085
74.2k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
74.2k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
74.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
74.2k
                "Premature end of data in tag %s line %d\n",
10089
74.2k
    name, line, NULL);
10090
74.2k
        return;
10091
74.2k
    }
10092
10093
34.5k
    xmlParseElementEnd(ctxt);
10094
34.5k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
7.41M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
7.41M
    const xmlChar *name;
10108
7.41M
    const xmlChar *prefix = NULL;
10109
7.41M
    const xmlChar *URI = NULL;
10110
7.41M
    xmlParserNodeInfo node_info;
10111
7.41M
    int line, tlen = 0;
10112
7.41M
    xmlNodePtr ret;
10113
7.41M
    int nsNr = ctxt->nsNr;
10114
10115
7.41M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
7.41M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
128
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
128
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
128
        xmlParserMaxDepth);
10120
128
  xmlHaltParser(ctxt);
10121
128
  return(-1);
10122
128
    }
10123
10124
    /* Capture start position */
10125
7.41M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
7.41M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
7.41M
    else if (*ctxt->space == -2)
10134
962k
  spacePush(ctxt, -1);
10135
6.44M
    else
10136
6.44M
  spacePush(ctxt, *ctxt->space);
10137
10138
7.41M
    line = ctxt->input->line;
10139
7.41M
#ifdef LIBXML_SAX1_ENABLED
10140
7.41M
    if (ctxt->sax2)
10141
5.74M
#endif /* LIBXML_SAX1_ENABLED */
10142
5.74M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
1.66M
#ifdef LIBXML_SAX1_ENABLED
10144
1.66M
    else
10145
1.66M
  name = xmlParseStartTag(ctxt);
10146
7.41M
#endif /* LIBXML_SAX1_ENABLED */
10147
7.41M
    if (ctxt->instate == XML_PARSER_EOF)
10148
211
  return(-1);
10149
7.41M
    if (name == NULL) {
10150
2.41M
  spacePop(ctxt);
10151
2.41M
        return(-1);
10152
2.41M
    }
10153
4.99M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
4.99M
    ret = ctxt->node;
10155
10156
4.99M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
4.99M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
4.99M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
4.99M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
4.99M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
2.03M
        SKIP(2);
10172
2.03M
  if (ctxt->sax2) {
10173
1.75M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
1.75M
    (!ctxt->disableSAX))
10175
1.27M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
1.75M
#ifdef LIBXML_SAX1_ENABLED
10177
1.75M
  } else {
10178
286k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
286k
    (!ctxt->disableSAX))
10180
227k
    ctxt->sax->endElement(ctxt->userData, name);
10181
286k
#endif /* LIBXML_SAX1_ENABLED */
10182
286k
  }
10183
2.03M
  namePop(ctxt);
10184
2.03M
  spacePop(ctxt);
10185
2.03M
  if (nsNr != ctxt->nsNr)
10186
16.2k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
2.03M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
2.03M
  return(1);
10195
2.03M
    }
10196
2.95M
    if (RAW == '>') {
10197
2.29M
        NEXT1;
10198
2.29M
    } else {
10199
657k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
657k
         "Couldn't find end of Start Tag %s line %d\n",
10201
657k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
657k
  nodePop(ctxt);
10207
657k
  namePop(ctxt);
10208
657k
  spacePop(ctxt);
10209
657k
  if (nsNr != ctxt->nsNr)
10210
112k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
657k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
657k
  return(-1);
10223
657k
    }
10224
10225
2.29M
    return(0);
10226
2.95M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.75M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.75M
    xmlParserNodeInfo node_info;
10237
1.75M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.75M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.75M
    if (ctxt->sax2) {
10249
1.16M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
1.16M
  namePop(ctxt);
10251
1.16M
    }
10252
587k
#ifdef LIBXML_SAX1_ENABLED
10253
587k
    else
10254
587k
  xmlParseEndTag1(ctxt, 0);
10255
1.75M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.75M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.75M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
316k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
316k
    xmlChar *buf = NULL;
10286
316k
    int len = 0;
10287
316k
    int size = 10;
10288
316k
    xmlChar cur;
10289
10290
316k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
316k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
316k
    cur = CUR;
10296
316k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
3.71k
  xmlFree(buf);
10298
3.71k
  return(NULL);
10299
3.71k
    }
10300
312k
    buf[len++] = cur;
10301
312k
    NEXT;
10302
312k
    cur=CUR;
10303
312k
    if (cur != '.') {
10304
7.00k
  xmlFree(buf);
10305
7.00k
  return(NULL);
10306
7.00k
    }
10307
305k
    buf[len++] = cur;
10308
305k
    NEXT;
10309
305k
    cur=CUR;
10310
2.81M
    while ((cur >= '0') && (cur <= '9')) {
10311
2.51M
  if (len + 1 >= size) {
10312
2.54k
      xmlChar *tmp;
10313
10314
2.54k
      size *= 2;
10315
2.54k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
2.54k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
2.54k
      buf = tmp;
10322
2.54k
  }
10323
2.51M
  buf[len++] = cur;
10324
2.51M
  NEXT;
10325
2.51M
  cur=CUR;
10326
2.51M
    }
10327
305k
    buf[len] = 0;
10328
305k
    return(buf);
10329
305k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
362k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
362k
    xmlChar *version = NULL;
10349
10350
362k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
325k
  SKIP(7);
10352
325k
  SKIP_BLANKS;
10353
325k
  if (RAW != '=') {
10354
4.67k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
4.67k
      return(NULL);
10356
4.67k
        }
10357
321k
  NEXT;
10358
321k
  SKIP_BLANKS;
10359
321k
  if (RAW == '"') {
10360
289k
      NEXT;
10361
289k
      version = xmlParseVersionNum(ctxt);
10362
289k
      if (RAW != '"') {
10363
16.8k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
16.8k
      } else
10365
272k
          NEXT;
10366
289k
  } else if (RAW == '\''){
10367
27.1k
      NEXT;
10368
27.1k
      version = xmlParseVersionNum(ctxt);
10369
27.1k
      if (RAW != '\'') {
10370
1.05k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
1.05k
      } else
10372
26.1k
          NEXT;
10373
27.1k
  } else {
10374
4.88k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
4.88k
  }
10376
321k
    }
10377
358k
    return(version);
10378
362k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
104k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
104k
    xmlChar *buf = NULL;
10395
104k
    int len = 0;
10396
104k
    int size = 10;
10397
104k
    xmlChar cur;
10398
10399
104k
    cur = CUR;
10400
104k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
104k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
103k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
103k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
103k
  buf[len++] = cur;
10409
103k
  NEXT;
10410
103k
  cur = CUR;
10411
4.35M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
4.35M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
4.35M
         ((cur >= '0') && (cur <= '9')) ||
10414
4.35M
         (cur == '.') || (cur == '_') ||
10415
4.35M
         (cur == '-')) {
10416
4.25M
      if (len + 1 >= size) {
10417
42.6k
          xmlChar *tmp;
10418
10419
42.6k
    size *= 2;
10420
42.6k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
42.6k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
42.6k
    buf = tmp;
10427
42.6k
      }
10428
4.25M
      buf[len++] = cur;
10429
4.25M
      NEXT;
10430
4.25M
      cur = CUR;
10431
4.25M
      if (cur == 0) {
10432
1.43k
          SHRINK;
10433
1.43k
    GROW;
10434
1.43k
    cur = CUR;
10435
1.43k
      }
10436
4.25M
        }
10437
103k
  buf[len] = 0;
10438
103k
    } else {
10439
735
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
735
    }
10441
104k
    return(buf);
10442
104k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
215k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
215k
    xmlChar *encoding = NULL;
10462
10463
215k
    SKIP_BLANKS;
10464
215k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
105k
  SKIP(8);
10466
105k
  SKIP_BLANKS;
10467
105k
  if (RAW != '=') {
10468
943
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
943
      return(NULL);
10470
943
        }
10471
105k
  NEXT;
10472
105k
  SKIP_BLANKS;
10473
105k
  if (RAW == '"') {
10474
91.6k
      NEXT;
10475
91.6k
      encoding = xmlParseEncName(ctxt);
10476
91.6k
      if (RAW != '"') {
10477
6.04k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
6.04k
    xmlFree((xmlChar *) encoding);
10479
6.04k
    return(NULL);
10480
6.04k
      } else
10481
85.5k
          NEXT;
10482
91.6k
  } else if (RAW == '\''){
10483
12.7k
      NEXT;
10484
12.7k
      encoding = xmlParseEncName(ctxt);
10485
12.7k
      if (RAW != '\'') {
10486
684
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
684
    xmlFree((xmlChar *) encoding);
10488
684
    return(NULL);
10489
684
      } else
10490
12.0k
          NEXT;
10491
12.7k
  } else {
10492
631
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
631
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
98.2k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
16.6k
      xmlFree((xmlChar *) encoding);
10500
16.6k
            return(NULL);
10501
16.6k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
81.6k
        if ((encoding != NULL) &&
10508
81.6k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
81.1k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
1.93k
      if ((ctxt->encoding == NULL) &&
10517
1.93k
          (ctxt->input->buf != NULL) &&
10518
1.93k
          (ctxt->input->buf->encoder == NULL)) {
10519
1.92k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
1.92k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
1.92k
      }
10522
1.93k
      if (ctxt->encoding != NULL)
10523
7
    xmlFree((xmlChar *) ctxt->encoding);
10524
1.93k
      ctxt->encoding = encoding;
10525
1.93k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
79.7k
        else if ((encoding != NULL) &&
10530
79.7k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
79.1k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
39.8k
      if (ctxt->encoding != NULL)
10533
10
    xmlFree((xmlChar *) ctxt->encoding);
10534
39.8k
      ctxt->encoding = encoding;
10535
39.8k
  }
10536
39.8k
  else if (encoding != NULL) {
10537
39.3k
      xmlCharEncodingHandlerPtr handler;
10538
10539
39.3k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
39.3k
      ctxt->input->encoding = encoding;
10542
10543
39.3k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
39.3k
      if (handler != NULL) {
10545
36.9k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
213
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
213
        return(NULL);
10549
213
    }
10550
36.9k
      } else {
10551
2.39k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
2.39k
      "Unsupported encoding %s\n", encoding);
10553
2.39k
    return(NULL);
10554
2.39k
      }
10555
39.3k
  }
10556
81.6k
    }
10557
188k
    return(encoding);
10558
215k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
177k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
177k
    int standalone = -2;
10596
10597
177k
    SKIP_BLANKS;
10598
177k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
30.2k
  SKIP(10);
10600
30.2k
        SKIP_BLANKS;
10601
30.2k
  if (RAW != '=') {
10602
264
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
264
      return(standalone);
10604
264
        }
10605
29.9k
  NEXT;
10606
29.9k
  SKIP_BLANKS;
10607
29.9k
        if (RAW == '\''){
10608
9.94k
      NEXT;
10609
9.94k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
6.41k
          standalone = 0;
10611
6.41k
                SKIP(2);
10612
6.41k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
3.52k
                 (NXT(2) == 's')) {
10614
3.13k
          standalone = 1;
10615
3.13k
    SKIP(3);
10616
3.13k
            } else {
10617
393
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
393
      }
10619
9.94k
      if (RAW != '\'') {
10620
633
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
633
      } else
10622
9.31k
          NEXT;
10623
20.0k
  } else if (RAW == '"'){
10624
19.7k
      NEXT;
10625
19.7k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
9.99k
          standalone = 0;
10627
9.99k
    SKIP(2);
10628
9.99k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
9.79k
                 (NXT(2) == 's')) {
10630
8.92k
          standalone = 1;
10631
8.92k
                SKIP(3);
10632
8.92k
            } else {
10633
873
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
873
      }
10635
19.7k
      if (RAW != '"') {
10636
1.32k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
1.32k
      } else
10638
18.4k
          NEXT;
10639
19.7k
  } else {
10640
228
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
228
        }
10642
29.9k
    }
10643
177k
    return(standalone);
10644
177k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
354k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
354k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
354k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
354k
    SKIP(5);
10672
10673
354k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
354k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
354k
    version = xmlParseVersionInfo(ctxt);
10683
354k
    if (version == NULL) {
10684
55.3k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
299k
    } else {
10686
299k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
9.83k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
4.44k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
4.44k
                "Unsupported version '%s'\n",
10693
4.44k
                version);
10694
5.39k
      } else {
10695
5.39k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
3.52k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
3.52k
                      "Unsupported version '%s'\n",
10698
3.52k
          version, NULL);
10699
3.52k
    } else {
10700
1.86k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
1.86k
              "Unsupported version '%s'\n",
10702
1.86k
              version);
10703
1.86k
    }
10704
5.39k
      }
10705
9.83k
  }
10706
299k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
299k
  ctxt->version = version;
10709
299k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
354k
    if (!IS_BLANK_CH(RAW)) {
10715
212k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
147k
      SKIP(2);
10717
147k
      return;
10718
147k
  }
10719
65.2k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
65.2k
    }
10721
207k
    xmlParseEncodingDecl(ctxt);
10722
207k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
207k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
2.47k
        return;
10728
2.47k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
204k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
28.8k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
27.2k
      SKIP(2);
10736
27.2k
      return;
10737
27.2k
  }
10738
1.57k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
1.57k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
177k
    GROW;
10745
10746
177k
    SKIP_BLANKS;
10747
177k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
177k
    SKIP_BLANKS;
10750
177k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
86.4k
        SKIP(2);
10752
91.1k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
493
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
493
  NEXT;
10756
90.6k
    } else {
10757
90.6k
        int c;
10758
10759
90.6k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
7.84M
        while ((c = CUR) != 0) {
10761
7.83M
            NEXT;
10762
7.83M
            if (c == '>')
10763
81.2k
                break;
10764
7.83M
        }
10765
90.6k
    }
10766
177k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
562k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
629k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
629k
        SKIP_BLANKS;
10783
629k
        GROW;
10784
629k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
45.5k
      xmlParsePI(ctxt);
10786
584k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
21.2k
      xmlParseComment(ctxt);
10788
562k
        } else {
10789
562k
            break;
10790
562k
        }
10791
629k
    }
10792
562k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
267k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
267k
    xmlChar start[4];
10812
267k
    xmlCharEncoding enc;
10813
10814
267k
    xmlInitParser();
10815
10816
267k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
267k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
267k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
267k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
267k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
267k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
267k
    if ((ctxt->encoding == NULL) &&
10835
267k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
266k
  start[0] = RAW;
10842
266k
  start[1] = NXT(1);
10843
266k
  start[2] = NXT(2);
10844
266k
  start[3] = NXT(3);
10845
266k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
266k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
131k
      xmlSwitchEncoding(ctxt, enc);
10848
131k
  }
10849
266k
    }
10850
10851
10852
267k
    if (CUR == 0) {
10853
632
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
632
  return(-1);
10855
632
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
266k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
13.6k
       GROW;
10865
13.6k
    }
10866
266k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
118k
  xmlParseXMLDecl(ctxt);
10872
118k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
118k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
826
      return(-1);
10878
826
  }
10879
117k
  ctxt->standalone = ctxt->input->standalone;
10880
117k
  SKIP_BLANKS;
10881
148k
    } else {
10882
148k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
148k
    }
10884
265k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
256k
        ctxt->sax->startDocument(ctxt->userData);
10886
265k
    if (ctxt->instate == XML_PARSER_EOF)
10887
14
  return(-1);
10888
265k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
265k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
265k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
265k
    GROW;
10903
265k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
153k
  ctxt->inSubset = 1;
10906
153k
  xmlParseDocTypeDecl(ctxt);
10907
153k
  if (RAW == '[') {
10908
111k
      ctxt->instate = XML_PARSER_DTD;
10909
111k
      xmlParseInternalSubset(ctxt);
10910
111k
      if (ctxt->instate == XML_PARSER_EOF)
10911
41.8k
    return(-1);
10912
111k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
111k
  ctxt->inSubset = 2;
10918
111k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
111k
      (!ctxt->disableSAX))
10920
103k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
103k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
111k
  if (ctxt->instate == XML_PARSER_EOF)
10923
6.57k
      return(-1);
10924
104k
  ctxt->inSubset = 0;
10925
10926
104k
        xmlCleanSpecialAttr(ctxt);
10927
10928
104k
  ctxt->instate = XML_PARSER_PROLOG;
10929
104k
  xmlParseMisc(ctxt);
10930
104k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
217k
    GROW;
10936
217k
    if (RAW != '<') {
10937
24.6k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
24.6k
           "Start tag expected, '<' not found\n");
10939
192k
    } else {
10940
192k
  ctxt->instate = XML_PARSER_CONTENT;
10941
192k
  xmlParseElement(ctxt);
10942
192k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
192k
  xmlParseMisc(ctxt);
10949
10950
192k
  if (RAW != 0) {
10951
45.5k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
45.5k
  }
10953
192k
  ctxt->instate = XML_PARSER_EOF;
10954
192k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
217k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
217k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
217k
    if ((ctxt->myDoc != NULL) &&
10966
217k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
390
  xmlFreeDoc(ctxt->myDoc);
10968
390
  ctxt->myDoc = NULL;
10969
390
    }
10970
10971
217k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
29.4k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
29.4k
  if (ctxt->valid)
10974
16.7k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
29.4k
  if (ctxt->nsWellFormed)
10976
26.1k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
29.4k
  if (ctxt->options & XML_PARSE_OLD10)
10978
8.29k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
29.4k
    }
10980
217k
    if (! ctxt->wellFormed) {
10981
187k
  ctxt->valid = 0;
10982
187k
  return(-1);
10983
187k
    }
10984
29.4k
    return(0);
10985
217k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
8.81M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
8.81M
    const xmlChar *cur;
11110
11111
8.81M
    if (ctxt->checkIndex == 0) {
11112
7.35M
        cur = ctxt->input->cur + 1;
11113
7.35M
    } else {
11114
1.45M
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
1.45M
    }
11116
11117
8.81M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
1.47M
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
1.47M
        return(0);
11120
7.34M
    } else {
11121
7.34M
        ctxt->checkIndex = 0;
11122
7.34M
        return(1);
11123
7.34M
    }
11124
8.81M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
6.23M
                     const char *str, size_t strLen) {
11138
6.23M
    const xmlChar *cur, *term;
11139
11140
6.23M
    if (ctxt->checkIndex == 0) {
11141
1.61M
        cur = ctxt->input->cur + startDelta;
11142
4.62M
    } else {
11143
4.62M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
4.62M
    }
11145
11146
6.23M
    term = BAD_CAST strstr((const char *) cur, str);
11147
6.23M
    if (term == NULL) {
11148
5.01M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
5.01M
        if ((size_t) (end - cur) < strLen)
11152
27.0k
            end = cur;
11153
4.99M
        else
11154
4.99M
            end -= strLen - 1;
11155
5.01M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
5.01M
    } else {
11157
1.21M
        ctxt->checkIndex = 0;
11158
1.21M
    }
11159
11160
6.23M
    return(term);
11161
6.23M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
19.2M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
19.2M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
19.2M
    const xmlChar *end = ctxt->input->end;
11173
11174
1.21G
    while (cur < end) {
11175
1.21G
        if ((*cur == '<') || (*cur == '&')) {
11176
16.8M
            ctxt->checkIndex = 0;
11177
16.8M
            return(1);
11178
16.8M
        }
11179
1.19G
        cur++;
11180
1.19G
    }
11181
11182
2.37M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
2.37M
    return(0);
11184
19.2M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
29.0M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
29.0M
    const xmlChar *cur;
11196
29.0M
    const xmlChar *end = ctxt->input->end;
11197
29.0M
    int state = ctxt->endCheckState;
11198
11199
29.0M
    if (ctxt->checkIndex == 0)
11200
10.4M
        cur = ctxt->input->cur + 1;
11201
18.6M
    else
11202
18.6M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
4.53G
    while (cur < end) {
11205
4.51G
        if (state) {
11206
2.75G
            if (*cur == state)
11207
40.7M
                state = 0;
11208
2.75G
        } else if (*cur == '\'' || *cur == '"') {
11209
40.7M
            state = *cur;
11210
1.72G
        } else if (*cur == '>') {
11211
10.3M
            ctxt->checkIndex = 0;
11212
10.3M
            ctxt->endCheckState = 0;
11213
10.3M
            return(1);
11214
10.3M
        }
11215
4.50G
        cur++;
11216
4.50G
    }
11217
11218
18.7M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
18.7M
    ctxt->endCheckState = state;
11220
18.7M
    return(0);
11221
29.0M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
9.47M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
9.47M
    const xmlChar *cur, *start;
11240
9.47M
    const xmlChar *end = ctxt->input->end;
11241
9.47M
    int state = ctxt->endCheckState;
11242
11243
9.47M
    if (ctxt->checkIndex == 0) {
11244
209k
        cur = ctxt->input->cur + 1;
11245
9.26M
    } else {
11246
9.26M
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
9.26M
    }
11248
9.47M
    start = cur;
11249
11250
2.38G
    while (cur < end) {
11251
2.38G
        if (state == '-') {
11252
37.6M
            if ((*cur == '-') &&
11253
37.6M
                (cur[1] == '-') &&
11254
37.6M
                (cur[2] == '>')) {
11255
71.3k
                state = 0;
11256
71.3k
                cur += 3;
11257
71.3k
                start = cur;
11258
71.3k
                continue;
11259
71.3k
            }
11260
37.6M
        }
11261
2.34G
        else if (state == ']') {
11262
214k
            if (*cur == '>') {
11263
165k
                ctxt->checkIndex = 0;
11264
165k
                ctxt->endCheckState = 0;
11265
165k
                return(1);
11266
165k
            }
11267
49.4k
            if (IS_BLANK_CH(*cur)) {
11268
11.3k
                state = ' ';
11269
38.1k
            } else if (*cur != ']') {
11270
23.0k
                state = 0;
11271
23.0k
                start = cur;
11272
23.0k
                continue;
11273
23.0k
            }
11274
49.4k
        }
11275
2.34G
        else if (state == ' ') {
11276
73.9k
            if (*cur == '>') {
11277
858
                ctxt->checkIndex = 0;
11278
858
                ctxt->endCheckState = 0;
11279
858
                return(1);
11280
858
            }
11281
73.1k
            if (!IS_BLANK_CH(*cur)) {
11282
10.4k
                state = 0;
11283
10.4k
                start = cur;
11284
10.4k
                continue;
11285
10.4k
            }
11286
73.1k
        }
11287
2.34G
        else if (state != 0) {
11288
1.35G
            if (*cur == state) {
11289
2.25M
                state = 0;
11290
2.25M
                start = cur + 1;
11291
2.25M
            }
11292
1.35G
        }
11293
987M
        else if (*cur == '<') {
11294
1.40M
            if ((cur[1] == '!') &&
11295
1.40M
                (cur[2] == '-') &&
11296
1.40M
                (cur[3] == '-')) {
11297
72.7k
                state = '-';
11298
72.7k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
72.7k
                start = cur;
11301
72.7k
                continue;
11302
72.7k
            }
11303
1.40M
        }
11304
986M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
2.47M
            state = *cur;
11306
2.47M
        }
11307
11308
2.38G
        cur++;
11309
2.38G
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
9.31M
    if ((state == 0) || (state == '-')) {
11316
2.85M
        if (cur - start < 3)
11317
26.6k
            cur = start;
11318
2.82M
        else
11319
2.82M
            cur -= 3;
11320
2.85M
    }
11321
9.31M
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
9.31M
    ctxt->endCheckState = state;
11323
9.31M
    return(0);
11324
9.47M
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
818k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
818k
    int ix;
11340
818k
    unsigned char c;
11341
818k
    int codepoint;
11342
11343
818k
    if ((utf == NULL) || (len <= 0))
11344
5.14k
        return(0);
11345
11346
21.4M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
21.3M
        c = utf[ix];
11348
21.3M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
15.5M
      if (c >= 0x20)
11350
13.0M
    ix++;
11351
2.55M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
2.44M
          ix++;
11353
111k
      else
11354
111k
          return(-ix);
11355
15.5M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
4.76M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
4.74M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
133k
          return(-ix);
11359
4.60M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
4.60M
      codepoint |= utf[ix+1] & 0x3f;
11361
4.60M
      if (!xmlIsCharQ(codepoint))
11362
22.3k
          return(-ix);
11363
4.58M
      ix += 2;
11364
4.58M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
375k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
333k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
333k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
78.8k
        return(-ix);
11369
254k
      codepoint = (utf[ix] & 0xf) << 12;
11370
254k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
254k
      codepoint |= utf[ix+2] & 0x3f;
11372
254k
      if (!xmlIsCharQ(codepoint))
11373
41.9k
          return(-ix);
11374
212k
      ix += 3;
11375
603k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
534k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
512k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
512k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
512k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
112k
        return(-ix);
11381
400k
      codepoint = (utf[ix] & 0x7) << 18;
11382
400k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
400k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
400k
      codepoint |= utf[ix+3] & 0x3f;
11385
400k
      if (!xmlIsCharQ(codepoint))
11386
51.3k
          return(-ix);
11387
348k
      ix += 4;
11388
348k
  } else       /* unknown encoding */
11389
68.8k
      return(-ix);
11390
21.3M
      }
11391
106k
      return(ix);
11392
813k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
38.7M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
38.7M
    int ret = 0;
11406
38.7M
    int avail, tlen;
11407
38.7M
    xmlChar cur, next;
11408
11409
38.7M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
38.7M
    if ((ctxt->input != NULL) &&
11466
38.7M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
226k
        xmlParserInputShrink(ctxt->input);
11468
226k
    }
11469
11470
1.22G
    while (ctxt->instate != XML_PARSER_EOF) {
11471
1.22G
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
135k
      return(0);
11473
11474
1.22G
  if (ctxt->input == NULL) break;
11475
1.22G
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
1.22G
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
1.22G
      if ((ctxt->instate != XML_PARSER_START) &&
11487
1.22G
          (ctxt->input->buf->raw != NULL) &&
11488
1.22G
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
3.85M
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
3.85M
                                                 ctxt->input);
11491
3.85M
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
3.85M
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
3.85M
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
3.85M
                                      base, current);
11496
3.85M
      }
11497
1.22G
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
1.22G
        (ctxt->input->cur - ctxt->input->base);
11499
1.22G
  }
11500
1.22G
        if (avail < 1)
11501
860k
      goto done;
11502
1.22G
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
3.89M
            case XML_PARSER_START:
11509
3.89M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
405k
        xmlChar start[4];
11511
405k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
405k
        if (avail < 4)
11517
3.13k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
401k
        start[0] = RAW;
11527
401k
        start[1] = NXT(1);
11528
401k
        start[2] = NXT(2);
11529
401k
        start[3] = NXT(3);
11530
401k
        enc = xmlDetectCharEncoding(start, 4);
11531
401k
        xmlSwitchEncoding(ctxt, enc);
11532
401k
        break;
11533
405k
    }
11534
11535
3.48M
    if (avail < 2)
11536
188
        goto done;
11537
3.48M
    cur = ctxt->input->cur[0];
11538
3.48M
    next = ctxt->input->cur[1];
11539
3.48M
    if (cur == 0) {
11540
1.02k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
1.02k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
1.02k
                  &xmlDefaultSAXLocator);
11543
1.02k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
1.02k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
1.02k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
1.02k
      ctxt->sax->endDocument(ctxt->userData);
11551
1.02k
        goto done;
11552
1.02k
    }
11553
3.48M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
3.22M
        if (avail < 5) goto done;
11556
3.22M
        if ((!terminate) &&
11557
3.22M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
2.95M
      goto done;
11559
269k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
269k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
269k
                  &xmlDefaultSAXLocator);
11562
269k
        if ((ctxt->input->cur[2] == 'x') &&
11563
269k
      (ctxt->input->cur[3] == 'm') &&
11564
269k
      (ctxt->input->cur[4] == 'l') &&
11565
269k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
236k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
236k
      xmlParseXMLDecl(ctxt);
11572
236k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
1.65k
          xmlHaltParser(ctxt);
11578
1.65k
          return(0);
11579
1.65k
      }
11580
234k
      ctxt->standalone = ctxt->input->standalone;
11581
234k
      if ((ctxt->encoding == NULL) &&
11582
234k
          (ctxt->input->encoding != NULL))
11583
24.3k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
234k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
234k
          (!ctxt->disableSAX))
11586
217k
          ctxt->sax->startDocument(ctxt->userData);
11587
234k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
234k
        } else {
11593
33.2k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
33.2k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
33.2k
          (!ctxt->disableSAX))
11596
33.2k
          ctxt->sax->startDocument(ctxt->userData);
11597
33.2k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
33.2k
        }
11603
269k
    } else {
11604
261k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
261k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
261k
                  &xmlDefaultSAXLocator);
11607
261k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
261k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
261k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
261k
            (!ctxt->disableSAX))
11614
261k
      ctxt->sax->startDocument(ctxt->userData);
11615
261k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
261k
    }
11621
529k
    break;
11622
29.3M
            case XML_PARSER_START_TAG: {
11623
29.3M
          const xmlChar *name;
11624
29.3M
    const xmlChar *prefix = NULL;
11625
29.3M
    const xmlChar *URI = NULL;
11626
29.3M
                int line = ctxt->input->line;
11627
29.3M
    int nsNr = ctxt->nsNr;
11628
11629
29.3M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
29.3M
    cur = ctxt->input->cur[0];
11632
29.3M
          if (cur != '<') {
11633
21.9k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
21.9k
        xmlHaltParser(ctxt);
11635
21.9k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
21.9k
      ctxt->sax->endDocument(ctxt->userData);
11637
21.9k
        goto done;
11638
21.9k
    }
11639
29.3M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
15.7M
                    goto done;
11641
13.5M
    if (ctxt->spaceNr == 0)
11642
729k
        spacePush(ctxt, -1);
11643
12.8M
    else if (*ctxt->space == -2)
11644
2.94M
        spacePush(ctxt, -1);
11645
9.90M
    else
11646
9.90M
        spacePush(ctxt, *ctxt->space);
11647
13.5M
#ifdef LIBXML_SAX1_ENABLED
11648
13.5M
    if (ctxt->sax2)
11649
8.41M
#endif /* LIBXML_SAX1_ENABLED */
11650
8.41M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
5.16M
#ifdef LIBXML_SAX1_ENABLED
11652
5.16M
    else
11653
5.16M
        name = xmlParseStartTag(ctxt);
11654
13.5M
#endif /* LIBXML_SAX1_ENABLED */
11655
13.5M
    if (ctxt->instate == XML_PARSER_EOF)
11656
776
        goto done;
11657
13.5M
    if (name == NULL) {
11658
30.4k
        spacePop(ctxt);
11659
30.4k
        xmlHaltParser(ctxt);
11660
30.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
30.4k
      ctxt->sax->endDocument(ctxt->userData);
11662
30.4k
        goto done;
11663
30.4k
    }
11664
13.5M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
13.5M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
13.5M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
13.5M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
13.5M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
5.74M
        SKIP(2);
11680
11681
5.74M
        if (ctxt->sax2) {
11682
3.64M
      if ((ctxt->sax != NULL) &&
11683
3.64M
          (ctxt->sax->endElementNs != NULL) &&
11684
3.64M
          (!ctxt->disableSAX))
11685
3.63M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
3.63M
                                  prefix, URI);
11687
3.64M
      if (ctxt->nsNr - nsNr > 0)
11688
56.7k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
3.64M
#ifdef LIBXML_SAX1_ENABLED
11690
3.64M
        } else {
11691
2.10M
      if ((ctxt->sax != NULL) &&
11692
2.10M
          (ctxt->sax->endElement != NULL) &&
11693
2.10M
          (!ctxt->disableSAX))
11694
2.10M
          ctxt->sax->endElement(ctxt->userData, name);
11695
2.10M
#endif /* LIBXML_SAX1_ENABLED */
11696
2.10M
        }
11697
5.74M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
5.74M
        spacePop(ctxt);
11700
5.74M
        if (ctxt->nameNr == 0) {
11701
35.5k
      ctxt->instate = XML_PARSER_EPILOG;
11702
5.70M
        } else {
11703
5.70M
      ctxt->instate = XML_PARSER_CONTENT;
11704
5.70M
        }
11705
5.74M
        break;
11706
5.74M
    }
11707
7.80M
    if (RAW == '>') {
11708
4.27M
        NEXT;
11709
4.27M
    } else {
11710
3.52M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
3.52M
           "Couldn't find end of Start Tag %s\n",
11712
3.52M
           name);
11713
3.52M
        nodePop(ctxt);
11714
3.52M
        spacePop(ctxt);
11715
3.52M
    }
11716
7.80M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
7.80M
    ctxt->instate = XML_PARSER_CONTENT;
11719
7.80M
                break;
11720
13.5M
      }
11721
1.17G
            case XML_PARSER_CONTENT: {
11722
1.17G
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
115k
        goto done;
11724
1.17G
    cur = ctxt->input->cur[0];
11725
1.17G
    next = ctxt->input->cur[1];
11726
11727
1.17G
    if ((cur == '<') && (next == '/')) {
11728
3.34M
        ctxt->instate = XML_PARSER_END_TAG;
11729
3.34M
        break;
11730
1.16G
          } else if ((cur == '<') && (next == '?')) {
11731
592k
        if ((!terminate) &&
11732
592k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
429k
      goto done;
11734
163k
        xmlParsePI(ctxt);
11735
163k
        ctxt->instate = XML_PARSER_CONTENT;
11736
1.16G
    } else if ((cur == '<') && (next != '!')) {
11737
13.2M
        ctxt->instate = XML_PARSER_START_TAG;
11738
13.2M
        break;
11739
1.15G
    } else if ((cur == '<') && (next == '!') &&
11740
1.15G
               (ctxt->input->cur[2] == '-') &&
11741
1.15G
         (ctxt->input->cur[3] == '-')) {
11742
1.60M
        if ((!terminate) &&
11743
1.60M
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
986k
      goto done;
11745
620k
        xmlParseComment(ctxt);
11746
620k
        ctxt->instate = XML_PARSER_CONTENT;
11747
1.15G
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
1.15G
        (ctxt->input->cur[2] == '[') &&
11749
1.15G
        (ctxt->input->cur[3] == 'C') &&
11750
1.15G
        (ctxt->input->cur[4] == 'D') &&
11751
1.15G
        (ctxt->input->cur[5] == 'A') &&
11752
1.15G
        (ctxt->input->cur[6] == 'T') &&
11753
1.15G
        (ctxt->input->cur[7] == 'A') &&
11754
1.15G
        (ctxt->input->cur[8] == '[')) {
11755
65.2k
        SKIP(9);
11756
65.2k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
65.2k
        break;
11758
1.15G
    } else if ((cur == '<') && (next == '!') &&
11759
1.15G
               (avail < 9)) {
11760
33.0k
        goto done;
11761
1.15G
    } else if (cur == '<') {
11762
1.15M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
1.15M
                    "detected an error in element content\n");
11764
1.15M
                    SKIP(1);
11765
1.15G
    } else if (cur == '&') {
11766
6.60M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
1.01M
      goto done;
11768
5.58M
        xmlParseReference(ctxt);
11769
1.14G
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
1.14G
        if ((ctxt->inputNr == 1) &&
11783
1.14G
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
21.1M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
2.37M
          goto done;
11786
21.1M
                    }
11787
1.14G
                    ctxt->checkIndex = 0;
11788
1.14G
        xmlParseCharData(ctxt, 0);
11789
1.14G
    }
11790
1.15G
    break;
11791
1.17G
      }
11792
1.15G
            case XML_PARSER_END_TAG:
11793
3.80M
    if (avail < 2)
11794
0
        goto done;
11795
3.80M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
453k
        goto done;
11797
3.34M
    if (ctxt->sax2) {
11798
2.20M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
2.20M
        nameNsPop(ctxt);
11800
2.20M
    }
11801
1.14M
#ifdef LIBXML_SAX1_ENABLED
11802
1.14M
      else
11803
1.14M
        xmlParseEndTag1(ctxt, 0);
11804
3.34M
#endif /* LIBXML_SAX1_ENABLED */
11805
3.34M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
3.34M
    } else if (ctxt->nameNr == 0) {
11808
54.5k
        ctxt->instate = XML_PARSER_EPILOG;
11809
3.29M
    } else {
11810
3.29M
        ctxt->instate = XML_PARSER_CONTENT;
11811
3.29M
    }
11812
3.34M
    break;
11813
924k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
924k
    const xmlChar *term;
11819
11820
924k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
31.7k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
31.7k
                                           "]]>");
11827
892k
                } else {
11828
892k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
892k
                }
11830
11831
924k
    if (term == NULL) {
11832
461k
        int tmp, size;
11833
11834
461k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
7.40k
                        size = ctxt->input->end - ctxt->input->cur;
11837
453k
                    } else {
11838
453k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
105k
                            goto done;
11840
348k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
348k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
348k
                    }
11844
355k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
355k
                    if (tmp <= 0) {
11846
290k
                        tmp = -tmp;
11847
290k
                        ctxt->input->cur += tmp;
11848
290k
                        goto encoding_error;
11849
290k
                    }
11850
64.4k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
64.4k
                        if (ctxt->sax->cdataBlock != NULL)
11852
38.5k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
38.5k
                                                  ctxt->input->cur, tmp);
11854
25.9k
                        else if (ctxt->sax->characters != NULL)
11855
25.9k
                            ctxt->sax->characters(ctxt->userData,
11856
25.9k
                                                  ctxt->input->cur, tmp);
11857
64.4k
                    }
11858
64.4k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
64.4k
                    SKIPL(tmp);
11861
463k
    } else {
11862
463k
                    int base = term - CUR_PTR;
11863
463k
        int tmp;
11864
11865
463k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
463k
        if ((tmp < 0) || (tmp != base)) {
11867
413k
      tmp = -tmp;
11868
413k
      ctxt->input->cur += tmp;
11869
413k
      goto encoding_error;
11870
413k
        }
11871
49.1k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
49.1k
            (ctxt->sax->cdataBlock != NULL) &&
11873
49.1k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
4.01k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
4.01k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
4.01k
                     "<![CDATA[", 9)))
11882
4.01k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
4.01k
                                 BAD_CAST "", 0);
11884
45.1k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
45.1k
      (!ctxt->disableSAX)) {
11886
44.0k
      if (ctxt->sax->cdataBlock != NULL)
11887
30.9k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
30.9k
              ctxt->input->cur, base);
11889
13.0k
      else if (ctxt->sax->characters != NULL)
11890
13.0k
          ctxt->sax->characters(ctxt->userData,
11891
13.0k
              ctxt->input->cur, base);
11892
44.0k
        }
11893
49.1k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
49.1k
        SKIPL(base + 3);
11896
49.1k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
49.1k
    }
11902
113k
    break;
11903
924k
      }
11904
3.75M
            case XML_PARSER_MISC:
11905
4.01M
            case XML_PARSER_PROLOG:
11906
4.10M
            case XML_PARSER_EPILOG:
11907
4.10M
    SKIP_BLANKS;
11908
4.10M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
4.10M
    else
11912
4.10M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
4.10M
                (ctxt->input->cur - ctxt->input->base);
11914
4.10M
    if (avail < 2)
11915
102k
        goto done;
11916
4.00M
    cur = ctxt->input->cur[0];
11917
4.00M
    next = ctxt->input->cur[1];
11918
4.00M
          if ((cur == '<') && (next == '?')) {
11919
136k
        if ((!terminate) &&
11920
136k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
74.0k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
62.7k
        xmlParsePI(ctxt);
11927
62.7k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
3.87M
    } else if ((cur == '<') && (next == '!') &&
11930
3.87M
        (ctxt->input->cur[2] == '-') &&
11931
3.87M
        (ctxt->input->cur[3] == '-')) {
11932
155k
        if ((!terminate) &&
11933
155k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
121k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
33.4k
        xmlParseComment(ctxt);
11940
33.4k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
3.71M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
3.71M
                    (cur == '<') && (next == '!') &&
11944
3.71M
        (ctxt->input->cur[2] == 'D') &&
11945
3.71M
        (ctxt->input->cur[3] == 'O') &&
11946
3.71M
        (ctxt->input->cur[4] == 'C') &&
11947
3.71M
        (ctxt->input->cur[5] == 'T') &&
11948
3.71M
        (ctxt->input->cur[6] == 'Y') &&
11949
3.71M
        (ctxt->input->cur[7] == 'P') &&
11950
3.71M
        (ctxt->input->cur[8] == 'E')) {
11951
3.25M
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
2.95M
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
297k
        ctxt->inSubset = 1;
11958
297k
        xmlParseDocTypeDecl(ctxt);
11959
297k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
297k
        if (RAW == '[') {
11962
217k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
217k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
79.8k
      ctxt->inSubset = 2;
11972
79.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
79.8k
          (ctxt->sax->externalSubset != NULL))
11974
73.6k
          ctxt->sax->externalSubset(ctxt->userData,
11975
73.6k
            ctxt->intSubName, ctxt->extSubSystem,
11976
73.6k
            ctxt->extSubURI);
11977
79.8k
      ctxt->inSubset = 0;
11978
79.8k
      xmlCleanSpecialAttr(ctxt);
11979
79.8k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
79.8k
        }
11985
458k
    } else if ((cur == '<') && (next == '!') &&
11986
458k
               (avail <
11987
66.9k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
61.5k
        goto done;
11989
396k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
11.8k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
11.8k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
11.8k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
11.8k
      ctxt->sax->endDocument(ctxt->userData);
11998
11.8k
        goto done;
11999
384k
                } else {
12000
384k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
384k
    }
12006
778k
    break;
12007
9.52M
            case XML_PARSER_DTD: {
12008
9.52M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
9.31M
                    goto done;
12010
215k
    xmlParseInternalSubset(ctxt);
12011
215k
    if (ctxt->instate == XML_PARSER_EOF)
12012
78.5k
        goto done;
12013
136k
    ctxt->inSubset = 2;
12014
136k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
136k
        (ctxt->sax->externalSubset != NULL))
12016
132k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
132k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
136k
    ctxt->inSubset = 0;
12019
136k
    xmlCleanSpecialAttr(ctxt);
12020
136k
    if (ctxt->instate == XML_PARSER_EOF)
12021
2.14k
        goto done;
12022
134k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
134k
                break;
12028
136k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
1.22G
  }
12102
1.22G
    }
12103
37.9M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
37.9M
    return(ret);
12108
704k
encoding_error:
12109
704k
    {
12110
704k
        char buffer[150];
12111
12112
704k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
704k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
704k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
704k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
704k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
704k
         BAD_CAST buffer, NULL);
12118
704k
    }
12119
704k
    return(0);
12120
38.7M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
43.0M
              int terminate) {
12136
43.0M
    int end_in_lf = 0;
12137
43.0M
    int remain = 0;
12138
12139
43.0M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
43.0M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
4.52M
        return(ctxt->errNo);
12143
38.5M
    if (ctxt->instate == XML_PARSER_EOF)
12144
330
        return(-1);
12145
38.5M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
38.5M
    ctxt->progressive = 1;
12149
38.5M
    if (ctxt->instate == XML_PARSER_START)
12150
3.26M
        xmlDetectSAX2(ctxt);
12151
38.5M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
38.5M
        (chunk[size - 1] == '\r')) {
12153
400k
  end_in_lf = 1;
12154
400k
  size--;
12155
400k
    }
12156
12157
38.7M
xmldecl_done:
12158
12159
38.7M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
38.7M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
38.3M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
38.3M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
38.3M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
38.3M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
38.3M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
341k
            unsigned int len = 45;
12173
12174
341k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
341k
                               BAD_CAST "UTF-16")) ||
12176
341k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
73.8k
                               BAD_CAST "UTF16")))
12178
267k
                len = 90;
12179
73.8k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
73.8k
                                    BAD_CAST "UCS-4")) ||
12181
73.8k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
73.3k
                                    BAD_CAST "UCS4")))
12183
434
                len = 180;
12184
12185
341k
            if (ctxt->input->buf->rawconsumed < len)
12186
5.13k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
341k
            if ((unsigned int) size > len) {
12194
229k
                remain = size - len;
12195
229k
                size = len;
12196
229k
            } else {
12197
111k
                remain = 0;
12198
111k
            }
12199
341k
        }
12200
38.3M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
38.3M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
38.3M
  if (res < 0) {
12203
1.43k
      ctxt->errNo = XML_PARSER_EOF;
12204
1.43k
      xmlHaltParser(ctxt);
12205
1.43k
      return (XML_PARSER_EOF);
12206
1.43k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
38.3M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
411k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
411k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
411k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
411k
        (in->raw != NULL)) {
12216
26.0k
    int nbchars;
12217
26.0k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
26.0k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
26.0k
    nbchars = xmlCharEncInput(in, terminate);
12221
26.0k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
26.0k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
675
        xmlGenericError(xmlGenericErrorContext,
12225
675
            "xmlParseChunk: encoder error\n");
12226
675
                    xmlHaltParser(ctxt);
12227
675
        return(XML_ERR_INVALID_ENCODING);
12228
675
    }
12229
26.0k
      }
12230
411k
  }
12231
411k
    }
12232
12233
38.7M
    if (remain != 0) {
12234
229k
        xmlParseTryOrFinish(ctxt, 0);
12235
38.5M
    } else {
12236
38.5M
        xmlParseTryOrFinish(ctxt, terminate);
12237
38.5M
    }
12238
38.7M
    if (ctxt->instate == XML_PARSER_EOF)
12239
149k
        return(ctxt->errNo);
12240
12241
38.5M
    if ((ctxt->input != NULL) &&
12242
38.5M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
38.5M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
38.5M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
38.5M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
140k
        return(ctxt->errNo);
12250
12251
38.4M
    if (remain != 0) {
12252
228k
        chunk += size;
12253
228k
        size = remain;
12254
228k
        remain = 0;
12255
228k
        goto xmldecl_done;
12256
228k
    }
12257
38.2M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
38.2M
        (ctxt->input->buf != NULL)) {
12259
400k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
400k
           ctxt->input);
12261
400k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
400k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
400k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
400k
            base, current);
12267
400k
    }
12268
38.2M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
175k
  int cur_avail = 0;
12273
12274
175k
  if (ctxt->input != NULL) {
12275
175k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
175k
      else
12279
175k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
175k
                    (ctxt->input->cur - ctxt->input->base);
12281
175k
  }
12282
12283
175k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
175k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
109k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
109k
  }
12287
175k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
306
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
306
  }
12290
175k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
175k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
175k
    ctxt->sax->endDocument(ctxt->userData);
12293
175k
  }
12294
175k
  ctxt->instate = XML_PARSER_EOF;
12295
175k
    }
12296
38.2M
    if (ctxt->wellFormed == 0)
12297
14.6M
  return((xmlParserErrors) ctxt->errNo);
12298
23.5M
    else
12299
23.5M
        return(0);
12300
38.2M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
534k
                        const char *chunk, int size, const char *filename) {
12330
534k
    xmlParserCtxtPtr ctxt;
12331
534k
    xmlParserInputPtr inputStream;
12332
534k
    xmlParserInputBufferPtr buf;
12333
534k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
534k
    if ((chunk != NULL) && (size >= 4))
12339
266k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
534k
    buf = xmlAllocParserInputBuffer(enc);
12342
534k
    if (buf == NULL) return(NULL);
12343
12344
534k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
534k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
534k
    ctxt->dictNames = 1;
12351
534k
    if (filename == NULL) {
12352
267k
  ctxt->directory = NULL;
12353
267k
    } else {
12354
267k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
267k
    }
12356
12357
534k
    inputStream = xmlNewInputStream(ctxt);
12358
534k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
534k
    if (filename == NULL)
12365
267k
  inputStream->filename = NULL;
12366
267k
    else {
12367
267k
  inputStream->filename = (char *)
12368
267k
      xmlCanonicPath((const xmlChar *) filename);
12369
267k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
267k
    }
12376
534k
    inputStream->buf = buf;
12377
534k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
534k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
534k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
534k
    if ((size != 0) && (chunk != NULL) &&
12388
534k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
266k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
266k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
266k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
266k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
266k
    }
12399
12400
534k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
131k
        xmlSwitchEncoding(ctxt, enc);
12402
131k
    }
12403
12404
534k
    return(ctxt);
12405
534k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
486k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
486k
    if (ctxt == NULL)
12418
0
        return;
12419
486k
    ctxt->instate = XML_PARSER_EOF;
12420
486k
    ctxt->disableSAX = 1;
12421
500k
    while (ctxt->inputNr > 1)
12422
14.4k
        xmlFreeInputStream(inputPop(ctxt));
12423
486k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
486k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
486k
        if (ctxt->input->buf != NULL) {
12433
417k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
417k
            ctxt->input->buf = NULL;
12435
417k
        }
12436
486k
  ctxt->input->cur = BAD_CAST"";
12437
486k
        ctxt->input->length = 0;
12438
486k
  ctxt->input->base = ctxt->input->cur;
12439
486k
        ctxt->input->end = ctxt->input->cur;
12440
486k
    }
12441
486k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
267k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
267k
    if (ctxt == NULL)
12452
0
        return;
12453
267k
    xmlHaltParser(ctxt);
12454
267k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
267k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
516k
          const xmlChar *ID, xmlNodePtr *list) {
12832
516k
    xmlParserCtxtPtr ctxt;
12833
516k
    xmlDocPtr newDoc;
12834
516k
    xmlNodePtr newRoot;
12835
516k
    xmlParserErrors ret = XML_ERR_OK;
12836
516k
    xmlChar start[4];
12837
516k
    xmlCharEncoding enc;
12838
12839
516k
    if (((depth > 40) &&
12840
516k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
516k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
516k
    if (list != NULL)
12848
49.4k
        *list = NULL;
12849
516k
    if ((URL == NULL) && (ID == NULL))
12850
325
  return(XML_ERR_INTERNAL_ERROR);
12851
516k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
516k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
516k
                                             oldctxt);
12856
516k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
77.1k
    if (oldctxt != NULL) {
12858
77.1k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
77.1k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
77.1k
    }
12861
77.1k
    xmlDetectSAX2(ctxt);
12862
12863
77.1k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
77.1k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
77.1k
    newDoc->properties = XML_DOC_INTERNAL;
12869
77.1k
    if (doc) {
12870
77.1k
        newDoc->intSubset = doc->intSubset;
12871
77.1k
        newDoc->extSubset = doc->extSubset;
12872
77.1k
        if (doc->dict) {
12873
59.7k
            newDoc->dict = doc->dict;
12874
59.7k
            xmlDictReference(newDoc->dict);
12875
59.7k
        }
12876
77.1k
        if (doc->URL != NULL) {
12877
49.5k
            newDoc->URL = xmlStrdup(doc->URL);
12878
49.5k
        }
12879
77.1k
    }
12880
77.1k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
77.1k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
77.1k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
77.1k
    nodePush(ctxt, newDoc->children);
12891
77.1k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
77.1k
    } else {
12894
77.1k
        ctxt->myDoc = doc;
12895
77.1k
        newRoot->doc = doc;
12896
77.1k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
77.1k
    GROW;
12904
77.1k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
71.3k
  start[0] = RAW;
12906
71.3k
  start[1] = NXT(1);
12907
71.3k
  start[2] = NXT(2);
12908
71.3k
  start[3] = NXT(3);
12909
71.3k
  enc = xmlDetectCharEncoding(start, 4);
12910
71.3k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
3.75k
      xmlSwitchEncoding(ctxt, enc);
12912
3.75k
  }
12913
71.3k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
77.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
1.77k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
1.77k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
1.77k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
277
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
277
                           "Version mismatch between document and entity\n");
12927
277
        }
12928
1.77k
    }
12929
12930
77.1k
    ctxt->instate = XML_PARSER_CONTENT;
12931
77.1k
    ctxt->depth = depth;
12932
77.1k
    if (oldctxt != NULL) {
12933
77.1k
  ctxt->_private = oldctxt->_private;
12934
77.1k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
77.1k
  ctxt->validate = oldctxt->validate;
12936
77.1k
  ctxt->valid = oldctxt->valid;
12937
77.1k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
77.1k
        if (oldctxt->validate) {
12939
20.1k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
20.1k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
20.1k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
20.1k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
20.1k
        }
12944
77.1k
  ctxt->external = oldctxt->external;
12945
77.1k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
77.1k
        ctxt->dict = oldctxt->dict;
12947
77.1k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
77.1k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
77.1k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
77.1k
        ctxt->dictNames = oldctxt->dictNames;
12951
77.1k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
77.1k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
77.1k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
77.1k
  ctxt->record_info = oldctxt->record_info;
12955
77.1k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
77.1k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
77.1k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
77.1k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
77.1k
    xmlParseContent(ctxt);
12970
12971
77.1k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
707
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
76.4k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
77.1k
    if (ctxt->node != newDoc->children) {
12977
6.81k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
6.81k
    }
12979
12980
77.1k
    if (!ctxt->wellFormed) {
12981
21.0k
  ret = (xmlParserErrors)ctxt->errNo;
12982
21.0k
        if (oldctxt != NULL) {
12983
21.0k
            oldctxt->errNo = ctxt->errNo;
12984
21.0k
            oldctxt->wellFormed = 0;
12985
21.0k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
21.0k
        }
12987
56.0k
    } else {
12988
56.0k
  if (list != NULL) {
12989
6.78k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
6.78k
      cur = newDoc->children->children;
12996
6.78k
      *list = cur;
12997
126k
      while (cur != NULL) {
12998
119k
    cur->parent = NULL;
12999
119k
    cur = cur->next;
13000
119k
      }
13001
6.78k
            newDoc->children->children = NULL;
13002
6.78k
  }
13003
56.0k
  ret = XML_ERR_OK;
13004
56.0k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
77.1k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
77.1k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
77.1k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
77.1k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
77.1k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
77.1k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
77.1k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
77.1k
    }
13020
13021
77.1k
    if (oldctxt != NULL) {
13022
77.1k
        ctxt->dict = NULL;
13023
77.1k
        ctxt->attsDefault = NULL;
13024
77.1k
        ctxt->attsSpecial = NULL;
13025
77.1k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
77.1k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
77.1k
        oldctxt->validate = ctxt->validate;
13028
77.1k
        oldctxt->valid = ctxt->valid;
13029
77.1k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
77.1k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
77.1k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
77.1k
    }
13033
77.1k
    ctxt->node_seq.maximum = 0;
13034
77.1k
    ctxt->node_seq.length = 0;
13035
77.1k
    ctxt->node_seq.buffer = NULL;
13036
77.1k
    xmlFreeParserCtxt(ctxt);
13037
77.1k
    newDoc->intSubset = NULL;
13038
77.1k
    newDoc->extSubset = NULL;
13039
77.1k
    xmlFreeDoc(newDoc);
13040
13041
77.1k
    return(ret);
13042
77.1k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
63.5k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
63.5k
    xmlParserCtxtPtr ctxt;
13125
63.5k
    xmlDocPtr newDoc = NULL;
13126
63.5k
    xmlNodePtr newRoot;
13127
63.5k
    xmlSAXHandlerPtr oldsax = NULL;
13128
63.5k
    xmlNodePtr content = NULL;
13129
63.5k
    xmlNodePtr last = NULL;
13130
63.5k
    int size;
13131
63.5k
    xmlParserErrors ret = XML_ERR_OK;
13132
63.5k
#ifdef SAX2
13133
63.5k
    int i;
13134
63.5k
#endif
13135
13136
63.5k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
63.5k
        (oldctxt->depth >  100)) {
13138
129
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
129
                       "Maximum entity nesting depth exceeded");
13140
129
  return(XML_ERR_ENTITY_LOOP);
13141
129
    }
13142
13143
13144
63.4k
    if (lst != NULL)
13145
53.2k
        *lst = NULL;
13146
63.4k
    if (string == NULL)
13147
66
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
63.3k
    size = xmlStrlen(string);
13150
13151
63.3k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
63.3k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
61.1k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
61.1k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
61.1k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
61.1k
    else
13158
61.1k
  ctxt->userData = ctxt;
13159
61.1k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
61.1k
    ctxt->dict = oldctxt->dict;
13161
61.1k
    ctxt->input_id = oldctxt->input_id;
13162
61.1k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
61.1k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
61.1k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
61.1k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
61.3k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
198
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
198
    }
13171
61.1k
#endif
13172
13173
61.1k
    oldsax = ctxt->sax;
13174
61.1k
    ctxt->sax = oldctxt->sax;
13175
61.1k
    xmlDetectSAX2(ctxt);
13176
61.1k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
61.1k
    ctxt->options = oldctxt->options;
13178
13179
61.1k
    ctxt->_private = oldctxt->_private;
13180
61.1k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
61.1k
    } else {
13193
61.1k
  ctxt->myDoc = oldctxt->myDoc;
13194
61.1k
        content = ctxt->myDoc->children;
13195
61.1k
  last = ctxt->myDoc->last;
13196
61.1k
    }
13197
61.1k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
61.1k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
61.1k
    ctxt->myDoc->children = NULL;
13208
61.1k
    ctxt->myDoc->last = NULL;
13209
61.1k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
61.1k
    nodePush(ctxt, ctxt->myDoc->children);
13211
61.1k
    ctxt->instate = XML_PARSER_CONTENT;
13212
61.1k
    ctxt->depth = oldctxt->depth;
13213
13214
61.1k
    ctxt->validate = 0;
13215
61.1k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
61.1k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
54.5k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
54.5k
    }
13222
61.1k
    ctxt->dictNames = oldctxt->dictNames;
13223
61.1k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
61.1k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
61.1k
    xmlParseContent(ctxt);
13227
61.1k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
207
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
60.9k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
61.1k
    if (ctxt->node != ctxt->myDoc->children) {
13233
984
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
984
    }
13235
13236
61.1k
    if (!ctxt->wellFormed) {
13237
10.8k
  ret = (xmlParserErrors)ctxt->errNo;
13238
10.8k
        oldctxt->errNo = ctxt->errNo;
13239
10.8k
        oldctxt->wellFormed = 0;
13240
10.8k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
50.2k
    } else {
13242
50.2k
        ret = XML_ERR_OK;
13243
50.2k
    }
13244
13245
61.1k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
42.1k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
42.1k
  cur = ctxt->myDoc->children->children;
13253
42.1k
  *lst = cur;
13254
176k
  while (cur != NULL) {
13255
134k
#ifdef LIBXML_VALID_ENABLED
13256
134k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
134k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
134k
    (cur->type == XML_ELEMENT_NODE)) {
13259
23.1k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
23.1k
      oldctxt->myDoc, cur);
13261
23.1k
      }
13262
134k
#endif /* LIBXML_VALID_ENABLED */
13263
134k
      cur->parent = NULL;
13264
134k
      cur = cur->next;
13265
134k
  }
13266
42.1k
  ctxt->myDoc->children->children = NULL;
13267
42.1k
    }
13268
61.1k
    if (ctxt->myDoc != NULL) {
13269
61.1k
  xmlFreeNode(ctxt->myDoc->children);
13270
61.1k
        ctxt->myDoc->children = content;
13271
61.1k
        ctxt->myDoc->last = last;
13272
61.1k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
61.1k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
61.1k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
61.1k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
61.1k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
61.1k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
61.1k
    }
13285
13286
61.1k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
61.1k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
61.1k
    ctxt->sax = oldsax;
13289
61.1k
    ctxt->dict = NULL;
13290
61.1k
    ctxt->attsDefault = NULL;
13291
61.1k
    ctxt->attsSpecial = NULL;
13292
61.1k
    xmlFreeParserCtxt(ctxt);
13293
61.1k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
61.1k
    return(ret);
13298
61.1k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
516k
        xmlParserCtxtPtr pctx) {
13783
516k
    xmlParserCtxtPtr ctxt;
13784
516k
    xmlParserInputPtr inputStream;
13785
516k
    char *directory = NULL;
13786
516k
    xmlChar *uri;
13787
13788
516k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
516k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
516k
    if (pctx != NULL) {
13794
516k
        ctxt->options = pctx->options;
13795
516k
        ctxt->_private = pctx->_private;
13796
516k
  ctxt->input_id = pctx->input_id;
13797
516k
    }
13798
13799
    /* Don't read from stdin. */
13800
516k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
52
        URL = BAD_CAST "./-";
13802
13803
516k
    uri = xmlBuildURI(URL, base);
13804
13805
516k
    if (uri == NULL) {
13806
10.9k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
10.9k
  if (inputStream == NULL) {
13808
10.7k
      xmlFreeParserCtxt(ctxt);
13809
10.7k
      return(NULL);
13810
10.7k
  }
13811
13812
218
  inputPush(ctxt, inputStream);
13813
13814
218
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
218
      directory = xmlParserGetDirectory((char *)URL);
13816
218
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
218
      ctxt->directory = directory;
13818
505k
    } else {
13819
505k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
505k
  if (inputStream == NULL) {
13821
428k
      xmlFree(uri);
13822
428k
      xmlFreeParserCtxt(ctxt);
13823
428k
      return(NULL);
13824
428k
  }
13825
13826
76.8k
  inputPush(ctxt, inputStream);
13827
13828
76.8k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
76.8k
      directory = xmlParserGetDirectory((char *)uri);
13830
76.8k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
76.8k
      ctxt->directory = directory;
13832
76.8k
  xmlFree(uri);
13833
76.8k
    }
13834
77.1k
    return(ctxt);
13835
516k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
330k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
330k
    xmlParserCtxtPtr ctxt;
14178
330k
    xmlParserInputPtr input;
14179
330k
    xmlParserInputBufferPtr buf;
14180
14181
330k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
330k
    if (size <= 0)
14184
2.26k
  return(NULL);
14185
14186
328k
    ctxt = xmlNewParserCtxt();
14187
328k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
328k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
328k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
328k
    input = xmlNewInputStream(ctxt);
14197
328k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
328k
    input->filename = NULL;
14204
328k
    input->buf = buf;
14205
328k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
328k
    inputPush(ctxt, input);
14208
328k
    return(ctxt);
14209
328k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
3.16G
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
3.16G
    if (xmlParserInitialized != 0)
14525
3.16G
  return;
14526
14527
3.70k
#ifdef LIBXML_THREAD_ENABLED
14528
3.70k
    __xmlGlobalInitMutexLock();
14529
3.70k
    if (xmlParserInitialized == 0) {
14530
3.70k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.70k
  xmlInitThreadsInternal();
14537
3.70k
  xmlInitGlobalsInternal();
14538
3.70k
  xmlInitMemoryInternal();
14539
3.70k
        __xmlInitializeDict();
14540
3.70k
  xmlInitEncodingInternal();
14541
3.70k
  xmlRegisterDefaultInputCallbacks();
14542
3.70k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.70k
  xmlRegisterDefaultOutputCallbacks();
14544
3.70k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.70k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.70k
  xmlInitXPathInternal();
14547
3.70k
#endif
14548
3.70k
  xmlParserInitialized = 1;
14549
3.70k
#ifdef LIBXML_THREAD_ENABLED
14550
3.70k
    }
14551
3.70k
    __xmlGlobalInitMutexUnlock();
14552
3.70k
#endif
14553
3.70k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
801k
{
14843
801k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
801k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
801k
    if (options & XML_PARSE_RECOVER) {
14851
411k
        ctxt->recovery = 1;
14852
411k
        options -= XML_PARSE_RECOVER;
14853
411k
  ctxt->options |= XML_PARSE_RECOVER;
14854
411k
    } else
14855
390k
        ctxt->recovery = 0;
14856
801k
    if (options & XML_PARSE_DTDLOAD) {
14857
580k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
580k
        options -= XML_PARSE_DTDLOAD;
14859
580k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
580k
    } else
14861
221k
        ctxt->loadsubset = 0;
14862
801k
    if (options & XML_PARSE_DTDATTR) {
14863
325k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
325k
        options -= XML_PARSE_DTDATTR;
14865
325k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
325k
    }
14867
801k
    if (options & XML_PARSE_NOENT) {
14868
507k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
507k
        options -= XML_PARSE_NOENT;
14871
507k
  ctxt->options |= XML_PARSE_NOENT;
14872
507k
    } else
14873
294k
        ctxt->replaceEntities = 0;
14874
801k
    if (options & XML_PARSE_PEDANTIC) {
14875
228k
        ctxt->pedantic = 1;
14876
228k
        options -= XML_PARSE_PEDANTIC;
14877
228k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
228k
    } else
14879
572k
        ctxt->pedantic = 0;
14880
801k
    if (options & XML_PARSE_NOBLANKS) {
14881
277k
        ctxt->keepBlanks = 0;
14882
277k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
277k
        options -= XML_PARSE_NOBLANKS;
14884
277k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
277k
    } else
14886
523k
        ctxt->keepBlanks = 1;
14887
801k
    if (options & XML_PARSE_DTDVALID) {
14888
342k
        ctxt->validate = 1;
14889
342k
        if (options & XML_PARSE_NOWARNING)
14890
240k
            ctxt->vctxt.warning = NULL;
14891
342k
        if (options & XML_PARSE_NOERROR)
14892
257k
            ctxt->vctxt.error = NULL;
14893
342k
        options -= XML_PARSE_DTDVALID;
14894
342k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
342k
    } else
14896
458k
        ctxt->validate = 0;
14897
801k
    if (options & XML_PARSE_NOWARNING) {
14898
305k
        ctxt->sax->warning = NULL;
14899
305k
        options -= XML_PARSE_NOWARNING;
14900
305k
    }
14901
801k
    if (options & XML_PARSE_NOERROR) {
14902
335k
        ctxt->sax->error = NULL;
14903
335k
        ctxt->sax->fatalError = NULL;
14904
335k
        options -= XML_PARSE_NOERROR;
14905
335k
    }
14906
801k
#ifdef LIBXML_SAX1_ENABLED
14907
801k
    if (options & XML_PARSE_SAX1) {
14908
254k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
254k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
254k
        ctxt->sax->startElementNs = NULL;
14911
254k
        ctxt->sax->endElementNs = NULL;
14912
254k
        ctxt->sax->initialized = 1;
14913
254k
        options -= XML_PARSE_SAX1;
14914
254k
  ctxt->options |= XML_PARSE_SAX1;
14915
254k
    }
14916
801k
#endif /* LIBXML_SAX1_ENABLED */
14917
801k
    if (options & XML_PARSE_NODICT) {
14918
275k
        ctxt->dictNames = 0;
14919
275k
        options -= XML_PARSE_NODICT;
14920
275k
  ctxt->options |= XML_PARSE_NODICT;
14921
525k
    } else {
14922
525k
        ctxt->dictNames = 1;
14923
525k
    }
14924
801k
    if (options & XML_PARSE_NOCDATA) {
14925
263k
        ctxt->sax->cdataBlock = NULL;
14926
263k
        options -= XML_PARSE_NOCDATA;
14927
263k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
263k
    }
14929
801k
    if (options & XML_PARSE_NSCLEAN) {
14930
289k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
289k
        options -= XML_PARSE_NSCLEAN;
14932
289k
    }
14933
801k
    if (options & XML_PARSE_NONET) {
14934
232k
  ctxt->options |= XML_PARSE_NONET;
14935
232k
        options -= XML_PARSE_NONET;
14936
232k
    }
14937
801k
    if (options & XML_PARSE_COMPACT) {
14938
442k
  ctxt->options |= XML_PARSE_COMPACT;
14939
442k
        options -= XML_PARSE_COMPACT;
14940
442k
    }
14941
801k
    if (options & XML_PARSE_OLD10) {
14942
260k
  ctxt->options |= XML_PARSE_OLD10;
14943
260k
        options -= XML_PARSE_OLD10;
14944
260k
    }
14945
801k
    if (options & XML_PARSE_NOBASEFIX) {
14946
254k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
254k
        options -= XML_PARSE_NOBASEFIX;
14948
254k
    }
14949
801k
    if (options & XML_PARSE_HUGE) {
14950
212k
  ctxt->options |= XML_PARSE_HUGE;
14951
212k
        options -= XML_PARSE_HUGE;
14952
212k
        if (ctxt->dict != NULL)
14953
212k
            xmlDictSetLimit(ctxt->dict, 0);
14954
212k
    }
14955
801k
    if (options & XML_PARSE_OLDSAX) {
14956
228k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
228k
        options -= XML_PARSE_OLDSAX;
14958
228k
    }
14959
801k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
260k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
260k
        options -= XML_PARSE_IGNORE_ENC;
14962
260k
    }
14963
801k
    if (options & XML_PARSE_BIG_LINES) {
14964
249k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
249k
        options -= XML_PARSE_BIG_LINES;
14966
249k
    }
14967
801k
    ctxt->linenumbers = 1;
14968
801k
    return (options);
14969
801k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
534k
{
14984
534k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
534k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
267k
{
15003
267k
    xmlDocPtr ret;
15004
15005
267k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
267k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
267k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
267k
        (ctxt->input->filename == NULL))
15015
267k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
267k
    xmlParseDocument(ctxt);
15017
267k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
157k
        ret = ctxt->myDoc;
15019
109k
    else {
15020
109k
        ret = NULL;
15021
109k
  if (ctxt->myDoc != NULL) {
15022
100k
      xmlFreeDoc(ctxt->myDoc);
15023
100k
  }
15024
109k
    }
15025
267k
    ctxt->myDoc = NULL;
15026
267k
    if (!reuse) {
15027
267k
  xmlFreeParserCtxt(ctxt);
15028
267k
    }
15029
15030
267k
    return (ret);
15031
267k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
267k
{
15096
267k
    xmlParserCtxtPtr ctxt;
15097
15098
267k
    xmlInitParser();
15099
267k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
267k
    if (ctxt == NULL)
15101
25
        return (NULL);
15102
267k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
267k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387