Coverage Report

Created: 2023-10-28 16:16

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
161M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
6.74k
#define XML_PARSER_NON_LINEAR 10
129
130
524M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
317M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
140G
#define XML_PARSER_BUFFER_SIZE 100
147
2.69M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
109M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
57.4k
{
215
57.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
57.4k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
57.4k
    if (ctxt != NULL)
219
57.4k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
57.4k
    if (prefix == NULL)
222
34.2k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
34.2k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
34.2k
                        (const char *) localname, NULL, NULL, 0, 0,
225
34.2k
                        "Attribute %s redefined\n", localname);
226
23.1k
    else
227
23.1k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
23.1k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
23.1k
                        (const char *) prefix, (const char *) localname,
230
23.1k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
23.1k
                        localname);
232
57.4k
    if (ctxt != NULL) {
233
57.4k
  ctxt->wellFormed = 0;
234
57.4k
  if (ctxt->recovery == 0)
235
21.7k
      ctxt->disableSAX = 1;
236
57.4k
    }
237
57.4k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
17.6M
{
250
17.6M
    const char *errmsg;
251
252
17.6M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
17.6M
        (ctxt->instate == XML_PARSER_EOF))
254
24.3k
  return;
255
17.6M
    switch (error) {
256
99.6k
        case XML_ERR_INVALID_HEX_CHARREF:
257
99.6k
            errmsg = "CharRef: invalid hexadecimal value";
258
99.6k
            break;
259
105k
        case XML_ERR_INVALID_DEC_CHARREF:
260
105k
            errmsg = "CharRef: invalid decimal value";
261
105k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
962k
        case XML_ERR_INTERNAL_ERROR:
266
962k
            errmsg = "internal error";
267
962k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
6.39M
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
6.39M
            errmsg = "PEReference: expecting ';'";
282
6.39M
            break;
283
3.05k
        case XML_ERR_ENTITY_LOOP:
284
3.05k
            errmsg = "Detected an entity reference loop";
285
3.05k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
3.65k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
3.65k
            errmsg = "PEReferences forbidden in internal subset";
291
3.65k
            break;
292
5.07k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
5.07k
            errmsg = "EntityValue: \" or ' expected";
294
5.07k
            break;
295
109k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
109k
            errmsg = "AttValue: \" or ' expected";
297
109k
            break;
298
309k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
309k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
309k
            break;
301
41.3k
        case XML_ERR_LITERAL_NOT_STARTED:
302
41.3k
            errmsg = "SystemLiteral \" or ' expected";
303
41.3k
            break;
304
1.34M
        case XML_ERR_LITERAL_NOT_FINISHED:
305
1.34M
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
1.34M
            break;
307
36.6k
        case XML_ERR_MISPLACED_CDATA_END:
308
36.6k
            errmsg = "Sequence ']]>' not allowed in content";
309
36.6k
            break;
310
24.5k
        case XML_ERR_URI_REQUIRED:
311
24.5k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
24.5k
            break;
313
16.9k
        case XML_ERR_PUBID_REQUIRED:
314
16.9k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
16.9k
            break;
316
5.62M
        case XML_ERR_HYPHEN_IN_COMMENT:
317
5.62M
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
5.62M
            break;
319
34.9k
        case XML_ERR_PI_NOT_STARTED:
320
34.9k
            errmsg = "xmlParsePI : no target name";
321
34.9k
            break;
322
15.5k
        case XML_ERR_RESERVED_XML_NAME:
323
15.5k
            errmsg = "Invalid PI name";
324
15.5k
            break;
325
2.27k
        case XML_ERR_NOTATION_NOT_STARTED:
326
2.27k
            errmsg = "NOTATION: Name expected here";
327
2.27k
            break;
328
1.33M
        case XML_ERR_NOTATION_NOT_FINISHED:
329
1.33M
            errmsg = "'>' required to close NOTATION declaration";
330
1.33M
            break;
331
16.5k
        case XML_ERR_VALUE_REQUIRED:
332
16.5k
            errmsg = "Entity value required";
333
16.5k
            break;
334
5.38k
        case XML_ERR_URI_FRAGMENT:
335
5.38k
            errmsg = "Fragment not allowed";
336
5.38k
            break;
337
28.0k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
28.0k
            errmsg = "'(' required to start ATTLIST enumeration";
339
28.0k
            break;
340
1.99k
        case XML_ERR_NMTOKEN_REQUIRED:
341
1.99k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
1.99k
            break;
343
21.2k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
21.2k
            errmsg = "')' required to finish ATTLIST enumeration";
345
21.2k
            break;
346
3.69k
        case XML_ERR_MIXED_NOT_STARTED:
347
3.69k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
3.69k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
9.45k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
9.45k
            errmsg = "ContentDecl : Name or '(' expected";
354
9.45k
            break;
355
14.8k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
14.8k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
14.8k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
307k
        case XML_ERR_GT_REQUIRED:
363
307k
            errmsg = "expected '>'";
364
307k
            break;
365
340
        case XML_ERR_CONDSEC_INVALID:
366
340
            errmsg = "XML conditional section '[' expected";
367
340
            break;
368
37.6k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
37.6k
            errmsg = "Content error in the external subset";
370
37.6k
            break;
371
1.97k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
1.97k
            errmsg =
373
1.97k
                "conditional section INCLUDE or IGNORE keyword expected";
374
1.97k
            break;
375
2.56k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.56k
            errmsg = "XML conditional section not closed";
377
2.56k
            break;
378
372
        case XML_ERR_XMLDECL_NOT_STARTED:
379
372
            errmsg = "Text declaration '<?xml' required";
380
372
            break;
381
127k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
127k
            errmsg = "parsing XML declaration: '?>' expected";
383
127k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
189k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
189k
            errmsg = "EntityRef: expecting ';'";
389
189k
            break;
390
29.6k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
29.6k
            errmsg = "DOCTYPE improperly terminated";
392
29.6k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
7.16k
        case XML_ERR_EQUAL_REQUIRED:
397
7.16k
            errmsg = "expected '='";
398
7.16k
            break;
399
26.9k
        case XML_ERR_STRING_NOT_CLOSED:
400
26.9k
            errmsg = "String not closed expecting \" or '";
401
26.9k
            break;
402
6.54k
        case XML_ERR_STRING_NOT_STARTED:
403
6.54k
            errmsg = "String not started expecting ' or \"";
404
6.54k
            break;
405
811
        case XML_ERR_ENCODING_NAME:
406
811
            errmsg = "Invalid XML encoding name";
407
811
            break;
408
1.08k
        case XML_ERR_STANDALONE_VALUE:
409
1.08k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.08k
            break;
411
28.5k
        case XML_ERR_DOCUMENT_EMPTY:
412
28.5k
            errmsg = "Document is empty";
413
28.5k
            break;
414
196k
        case XML_ERR_DOCUMENT_END:
415
196k
            errmsg = "Extra content at the end of the document";
416
196k
            break;
417
9.67k
        case XML_ERR_NOT_WELL_BALANCED:
418
9.67k
            errmsg = "chunk is not well balanced";
419
9.67k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
75.5k
        case XML_ERR_VERSION_MISSING:
424
75.5k
            errmsg = "Malformed declaration expecting version";
425
75.5k
            break;
426
685
        case XML_ERR_NAME_TOO_LONG:
427
685
            errmsg = "Name too long";
428
685
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
4.19k
        default:
435
4.19k
            errmsg = "Unregistered error message";
436
17.6M
    }
437
17.6M
    if (ctxt != NULL)
438
17.6M
  ctxt->errNo = error;
439
17.6M
    if (info == NULL) {
440
16.6M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
16.6M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
16.6M
                        errmsg);
443
16.6M
    } else {
444
963k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
963k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
963k
                        errmsg, info);
447
963k
    }
448
17.6M
    if (ctxt != NULL) {
449
17.6M
  ctxt->wellFormed = 0;
450
17.6M
  if (ctxt->recovery == 0)
451
3.25M
      ctxt->disableSAX = 1;
452
17.6M
    }
453
17.6M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
7.71M
{
467
7.71M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
7.71M
        (ctxt->instate == XML_PARSER_EOF))
469
1.19k
  return;
470
7.71M
    if (ctxt != NULL)
471
7.71M
  ctxt->errNo = error;
472
7.71M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
7.71M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
7.71M
    if (ctxt != NULL) {
475
7.71M
  ctxt->wellFormed = 0;
476
7.71M
  if (ctxt->recovery == 0)
477
1.96M
      ctxt->disableSAX = 1;
478
7.71M
    }
479
7.71M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
64.5M
{
495
64.5M
    xmlStructuredErrorFunc schannel = NULL;
496
497
64.5M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
64.5M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
64.5M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
64.5M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
37.7M
        schannel = ctxt->sax->serror;
503
64.5M
    if (ctxt != NULL) {
504
64.5M
        __xmlRaiseError(schannel,
505
64.5M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
64.5M
                    ctxt->userData,
507
64.5M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
64.5M
                    XML_ERR_WARNING, NULL, 0,
509
64.5M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
64.5M
        msg, (const char *) str1, (const char *) str2);
511
64.5M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
64.5M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
19.9M
{
533
19.9M
    xmlStructuredErrorFunc schannel = NULL;
534
535
19.9M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
19.9M
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
19.9M
    if (ctxt != NULL) {
539
19.9M
  ctxt->errNo = error;
540
19.9M
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
9.93M
      schannel = ctxt->sax->serror;
542
19.9M
    }
543
19.9M
    if (ctxt != NULL) {
544
19.9M
        __xmlRaiseError(schannel,
545
19.9M
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
19.9M
                    ctxt, NULL, XML_FROM_DTD, error,
547
19.9M
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
19.9M
        (const char *) str2, NULL, 0, 0,
549
19.9M
        msg, (const char *) str1, (const char *) str2);
550
19.9M
  ctxt->valid = 0;
551
19.9M
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
19.9M
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
3.18M
{
573
3.18M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
3.18M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
3.18M
    if (ctxt != NULL)
577
3.18M
  ctxt->errNo = error;
578
3.18M
    __xmlRaiseError(NULL, NULL, NULL,
579
3.18M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
3.18M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
3.18M
    if (ctxt != NULL) {
582
3.18M
  ctxt->wellFormed = 0;
583
3.18M
  if (ctxt->recovery == 0)
584
397k
      ctxt->disableSAX = 1;
585
3.18M
    }
586
3.18M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
1.70M
{
604
1.70M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
1.70M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
1.70M
    if (ctxt != NULL)
608
1.70M
  ctxt->errNo = error;
609
1.70M
    __xmlRaiseError(NULL, NULL, NULL,
610
1.70M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
1.70M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
1.70M
        NULL, val, 0, msg, str1, val, str2);
613
1.70M
    if (ctxt != NULL) {
614
1.70M
  ctxt->wellFormed = 0;
615
1.70M
  if (ctxt->recovery == 0)
616
413k
      ctxt->disableSAX = 1;
617
1.70M
    }
618
1.70M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
93.7M
{
633
93.7M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
93.7M
        (ctxt->instate == XML_PARSER_EOF))
635
62
  return;
636
93.7M
    if (ctxt != NULL)
637
93.7M
  ctxt->errNo = error;
638
93.7M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
93.7M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
93.7M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
93.7M
                    val);
642
93.7M
    if (ctxt != NULL) {
643
93.7M
  ctxt->wellFormed = 0;
644
93.7M
  if (ctxt->recovery == 0)
645
30.6M
      ctxt->disableSAX = 1;
646
93.7M
    }
647
93.7M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
1.30M
{
662
1.30M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
1.30M
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
1.30M
    if (ctxt != NULL)
666
1.30M
  ctxt->errNo = error;
667
1.30M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
1.30M
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
1.30M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
1.30M
                    val);
671
1.30M
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
1.11M
{
689
1.11M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
1.11M
        (ctxt->instate == XML_PARSER_EOF))
691
70
  return;
692
1.11M
    if (ctxt != NULL)
693
1.11M
  ctxt->errNo = error;
694
1.11M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
1.11M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
1.11M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
1.11M
                    info1, info2, info3);
698
1.11M
    if (ctxt != NULL)
699
1.11M
  ctxt->nsWellFormed = 0;
700
1.11M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
79.3k
{
718
79.3k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
79.3k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
79.3k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
79.3k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
79.3k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
79.3k
                    info1, info2, info3);
725
79.3k
}
726
727
static void
728
1.75G
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
1.75G
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
1.75G
    else
732
1.75G
        *dst += val;
733
1.75G
}
734
735
static void
736
530M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
530M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
530M
    else
740
530M
        *dst += val;
741
530M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
524M
{
770
524M
    unsigned long consumed;
771
524M
    xmlParserInputPtr input = ctxt->input;
772
524M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
524M
    consumed = input->parentConsumed;
779
524M
    if ((entity == NULL) ||
780
524M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
344M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
179M
        xmlSaturatedAdd(&consumed, input->consumed);
783
179M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
179M
    }
785
524M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
524M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
524M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
524M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
524M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
6.74k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
6.74k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
6.74k
                       "Maximum entity amplification factor exceeded");
803
6.74k
        xmlHaltParser(ctxt);
804
6.74k
        return(1);
805
6.74k
    }
806
807
524M
    return(0);
808
524M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
1.87M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
1.87M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
1.87M
    (void) sax;
1048
1049
1.87M
    if (ctxt == NULL) return;
1050
1.87M
    sax = ctxt->sax;
1051
1.87M
#ifdef LIBXML_SAX1_ENABLED
1052
1.87M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
1.87M
        ((sax->startElementNs != NULL) ||
1054
1.10M
         (sax->endElementNs != NULL) ||
1055
1.10M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.10M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
1.87M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
1.87M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
1.87M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
1.87M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
1.87M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
1.87M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
366k
{
1103
366k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
589k
    while (*src == 0x20) src++;
1107
5.29M
    while (*src != 0) {
1108
4.92M
  if (*src == 0x20) {
1109
1.80M
      while (*src == 0x20) src++;
1110
284k
      if (*src != 0)
1111
252k
    *dst++ = 0x20;
1112
4.64M
  } else {
1113
4.64M
      *dst++ = *src++;
1114
4.64M
  }
1115
4.92M
    }
1116
366k
    *dst = 0;
1117
366k
    if (dst == src)
1118
292k
       return(NULL);
1119
74.2k
    return(dst);
1120
366k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
97.5k
{
1136
97.5k
    int i;
1137
97.5k
    int remove_head = 0;
1138
97.5k
    int need_realloc = 0;
1139
97.5k
    const xmlChar *cur;
1140
1141
97.5k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
97.5k
    i = *len;
1144
97.5k
    if (i <= 0)
1145
5.84k
        return(NULL);
1146
1147
91.6k
    cur = src;
1148
121k
    while (*cur == 0x20) {
1149
29.9k
        cur++;
1150
29.9k
  remove_head++;
1151
29.9k
    }
1152
1.96M
    while (*cur != 0) {
1153
1.89M
  if (*cur == 0x20) {
1154
73.4k
      cur++;
1155
73.4k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
15.6k
          need_realloc = 1;
1157
15.6k
    break;
1158
15.6k
      }
1159
73.4k
  } else
1160
1.81M
      cur++;
1161
1.89M
    }
1162
91.6k
    if (need_realloc) {
1163
15.6k
        xmlChar *ret;
1164
1165
15.6k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
15.6k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
15.6k
  xmlAttrNormalizeSpace(ret, ret);
1171
15.6k
  *len = strlen((const char *)ret);
1172
15.6k
        return(ret);
1173
76.0k
    } else if (remove_head) {
1174
3.87k
        *len -= remove_head;
1175
3.87k
        memmove(src, src + remove_head, 1 + *len);
1176
3.87k
  return(src);
1177
3.87k
    }
1178
72.1k
    return(NULL);
1179
91.6k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
685k
               const xmlChar *value) {
1195
685k
    xmlDefAttrsPtr defaults;
1196
685k
    int len;
1197
685k
    const xmlChar *name;
1198
685k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
685k
    if (ctxt->attsSpecial != NULL) {
1204
656k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
44.3k
      return;
1206
656k
    }
1207
1208
641k
    if (ctxt->attsDefault == NULL) {
1209
58.1k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
58.1k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
58.1k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
641k
    name = xmlSplitQName3(fullname, &len);
1219
641k
    if (name == NULL) {
1220
627k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
627k
  prefix = NULL;
1222
627k
    } else {
1223
14.5k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
14.5k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
14.5k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
641k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
641k
    if (defaults == NULL) {
1232
314k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
314k
                     (4 * 5) * sizeof(const xmlChar *));
1234
314k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
314k
  defaults->nbAttrs = 0;
1237
314k
  defaults->maxAttrs = 4;
1238
314k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
314k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
326k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
3.28k
        xmlDefAttrsPtr temp;
1245
1246
3.28k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
3.28k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
3.28k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
3.28k
  defaults = temp;
1251
3.28k
  defaults->maxAttrs *= 2;
1252
3.28k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
3.28k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
3.28k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
641k
    name = xmlSplitQName3(fullattr, &len);
1264
641k
    if (name == NULL) {
1265
537k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
537k
  prefix = NULL;
1267
537k
    } else {
1268
103k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
103k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
103k
    }
1271
1272
641k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
641k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
641k
    len = xmlStrlen(value);
1276
641k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
641k
    if (value == NULL)
1278
0
        goto mem_error;
1279
641k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
641k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
641k
    if (ctxt->external)
1282
473k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
167k
    else
1284
167k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
641k
    defaults->nbAttrs++;
1286
1287
641k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
641k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
8.40M
{
1309
8.40M
    if (ctxt->attsSpecial == NULL) {
1310
97.9k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
97.9k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
97.9k
    }
1314
1315
8.40M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
601k
        return;
1317
1318
7.80M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
7.80M
                     (void *) (ptrdiff_t) type);
1320
7.80M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
8.40M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
6.56M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
6.56M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
6.56M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
2.11M
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
2.11M
    }
1341
6.56M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
365k
{
1354
365k
    if (ctxt->attsSpecial == NULL)
1355
288k
        return;
1356
1357
77.9k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
77.9k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
10.3k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
10.3k
        ctxt->attsSpecial = NULL;
1362
10.3k
    }
1363
77.9k
    return;
1364
365k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
92.8k
{
1427
92.8k
    const xmlChar *cur = lang, *nxt;
1428
1429
92.8k
    if (cur == NULL)
1430
4.84k
        return (0);
1431
88.0k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
88.0k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
88.0k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
88.0k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
6.57k
        cur += 2;
1441
15.6k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
15.6k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
9.08k
            cur++;
1444
6.57k
        return(cur[0] == 0);
1445
6.57k
    }
1446
81.4k
    nxt = cur;
1447
284k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
284k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
203k
           nxt++;
1450
81.4k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
3.40k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
3.24k
            return(0);
1456
157
        return(1);
1457
3.40k
    }
1458
78.0k
    if (nxt - cur < 2)
1459
6.77k
        return(0);
1460
    /* we got an ISO 639 code */
1461
71.2k
    if (nxt[0] == 0)
1462
34.1k
        return(1);
1463
37.1k
    if (nxt[0] != '-')
1464
6.30k
        return(0);
1465
1466
30.8k
    nxt++;
1467
30.8k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
30.8k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
1.45k
        goto region_m49;
1471
1472
109k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
109k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
80.2k
           nxt++;
1475
29.3k
    if (nxt - cur == 4)
1476
4.31k
        goto script;
1477
25.0k
    if (nxt - cur == 2)
1478
2.28k
        goto region;
1479
22.7k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
1.22k
        goto variant;
1481
21.5k
    if (nxt - cur != 3)
1482
6.52k
        return(0);
1483
    /* we parsed an extlang */
1484
15.0k
    if (nxt[0] == 0)
1485
300
        return(1);
1486
14.7k
    if (nxt[0] != '-')
1487
633
        return(0);
1488
1489
14.0k
    nxt++;
1490
14.0k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
14.0k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
426
        goto region_m49;
1494
1495
85.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
85.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
71.5k
           nxt++;
1498
13.6k
    if (nxt - cur == 2)
1499
1.20k
        goto region;
1500
12.4k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
891
        goto variant;
1502
11.5k
    if (nxt - cur != 4)
1503
4.09k
        return(0);
1504
    /* we parsed a script */
1505
11.7k
script:
1506
11.7k
    if (nxt[0] == 0)
1507
638
        return(1);
1508
11.1k
    if (nxt[0] != '-')
1509
6.67k
        return(0);
1510
1511
4.47k
    nxt++;
1512
4.47k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
4.47k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
682
        goto region_m49;
1516
1517
21.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
21.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
17.3k
           nxt++;
1520
1521
3.79k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
1.17k
        goto variant;
1523
2.61k
    if (nxt - cur != 2)
1524
1.50k
        return(0);
1525
    /* we parsed a region */
1526
5.08k
region:
1527
5.08k
    if (nxt[0] == 0)
1528
1.49k
        return(1);
1529
3.59k
    if (nxt[0] != '-')
1530
2.21k
        return(0);
1531
1532
1.38k
    nxt++;
1533
1.38k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
10.0k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
10.0k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
8.69k
           nxt++;
1538
1539
1.38k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
931
        return(0);
1541
1542
    /* we parsed a variant */
1543
3.73k
variant:
1544
3.73k
    if (nxt[0] == 0)
1545
777
        return(1);
1546
2.96k
    if (nxt[0] != '-')
1547
2.84k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
120
    return (1);
1550
1551
2.56k
region_m49:
1552
2.56k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
2.56k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
484
        nxt += 3;
1555
484
        goto region;
1556
484
    }
1557
2.07k
    return(0);
1558
2.56k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
441k
{
1584
441k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
148k
        int i;
1586
768k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
726k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
107k
          if (ctxt->nsTab[i + 1] == URL)
1590
23.7k
        return(-2);
1591
    /* out of scope keep it */
1592
83.3k
    break;
1593
107k
      }
1594
726k
  }
1595
148k
    }
1596
418k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
61.8k
  ctxt->nsMax = 10;
1598
61.8k
  ctxt->nsNr = 0;
1599
61.8k
  ctxt->nsTab = (const xmlChar **)
1600
61.8k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
61.8k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
356k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
8.58k
        const xmlChar ** tmp;
1608
8.58k
        ctxt->nsMax *= 2;
1609
8.58k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
8.58k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
8.58k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
8.58k
  ctxt->nsTab = tmp;
1617
8.58k
    }
1618
418k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
418k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
418k
    return (ctxt->nsNr);
1621
418k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
130k
{
1634
130k
    int i;
1635
1636
130k
    if (ctxt->nsTab == NULL) return(0);
1637
130k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
130k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
485k
    for (i = 0;i < nr;i++) {
1645
354k
         ctxt->nsNr--;
1646
354k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
354k
    }
1648
130k
    return(nr);
1649
130k
}
1650
#endif
1651
1652
static int
1653
147k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
147k
    const xmlChar **atts;
1655
147k
    int *attallocs;
1656
147k
    int maxatts;
1657
1658
147k
    if (nr + 5 > ctxt->maxatts) {
1659
147k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
147k
  atts = (const xmlChar **) xmlMalloc(
1661
147k
             maxatts * sizeof(const xmlChar *));
1662
147k
  if (atts == NULL) goto mem_error;
1663
147k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
147k
                               (maxatts / 5) * sizeof(int));
1665
147k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
147k
        if (ctxt->maxatts > 0)
1670
882
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
147k
        xmlFree(ctxt->atts);
1672
147k
  ctxt->atts = atts;
1673
147k
  ctxt->attallocs = attallocs;
1674
147k
  ctxt->maxatts = maxatts;
1675
147k
    }
1676
147k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
147k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
346M
{
1694
346M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
346M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
9.82k
        size_t newSize = ctxt->inputMax * 2;
1698
9.82k
        xmlParserInputPtr *tmp;
1699
1700
9.82k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
9.82k
                                               newSize * sizeof(*tmp));
1702
9.82k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
9.82k
        ctxt->inputTab = tmp;
1707
9.82k
        ctxt->inputMax = newSize;
1708
9.82k
    }
1709
346M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
346M
    ctxt->input = value;
1711
346M
    return (ctxt->inputNr++);
1712
346M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
349M
{
1724
349M
    xmlParserInputPtr ret;
1725
1726
349M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
349M
    if (ctxt->inputNr <= 0)
1729
3.61M
        return (NULL);
1730
345M
    ctxt->inputNr--;
1731
345M
    if (ctxt->inputNr > 0)
1732
344M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
1.17M
    else
1734
1.17M
        ctxt->input = NULL;
1735
345M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
345M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
345M
    return (ret);
1738
349M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
47.8M
{
1751
47.8M
    if (ctxt == NULL) return(0);
1752
47.8M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
39.3k
        xmlNodePtr *tmp;
1754
1755
39.3k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
39.3k
                                      ctxt->nodeMax * 2 *
1757
39.3k
                                      sizeof(ctxt->nodeTab[0]));
1758
39.3k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
39.3k
        ctxt->nodeTab = tmp;
1763
39.3k
  ctxt->nodeMax *= 2;
1764
39.3k
    }
1765
47.8M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
47.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
170
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
170
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
170
        xmlParserMaxDepth);
1770
170
  xmlHaltParser(ctxt);
1771
170
  return(-1);
1772
170
    }
1773
47.8M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
47.8M
    ctxt->node = value;
1775
47.8M
    return (ctxt->nodeNr++);
1776
47.8M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
46.8M
{
1789
46.8M
    xmlNodePtr ret;
1790
1791
46.8M
    if (ctxt == NULL) return(NULL);
1792
46.8M
    if (ctxt->nodeNr <= 0)
1793
375k
        return (NULL);
1794
46.4M
    ctxt->nodeNr--;
1795
46.4M
    if (ctxt->nodeNr > 0)
1796
45.7M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
720k
    else
1798
720k
        ctxt->node = NULL;
1799
46.4M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
46.4M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
46.4M
    return (ret);
1802
46.8M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
44.6M
{
1821
44.6M
    xmlStartTag *tag;
1822
1823
44.6M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
68.0k
        const xmlChar * *tmp;
1825
68.0k
        xmlStartTag *tmp2;
1826
68.0k
        ctxt->nameMax *= 2;
1827
68.0k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
68.0k
                                    ctxt->nameMax *
1829
68.0k
                                    sizeof(ctxt->nameTab[0]));
1830
68.0k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
68.0k
  ctxt->nameTab = tmp;
1835
68.0k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
68.0k
                                    ctxt->nameMax *
1837
68.0k
                                    sizeof(ctxt->pushTab[0]));
1838
68.0k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
68.0k
  ctxt->pushTab = tmp2;
1843
44.5M
    } else if (ctxt->pushTab == NULL) {
1844
540k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
540k
                                            sizeof(ctxt->pushTab[0]));
1846
540k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
540k
    }
1849
44.6M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
44.6M
    ctxt->name = value;
1851
44.6M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
44.6M
    tag->prefix = prefix;
1853
44.6M
    tag->URI = URI;
1854
44.6M
    tag->line = line;
1855
44.6M
    tag->nsNr = nsNr;
1856
44.6M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
44.6M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
9.68M
{
1873
9.68M
    const xmlChar *ret;
1874
1875
9.68M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
9.68M
    ctxt->nameNr--;
1878
9.68M
    if (ctxt->nameNr > 0)
1879
9.64M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
38.3k
    else
1881
38.3k
        ctxt->name = NULL;
1882
9.68M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
9.68M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
9.68M
    return (ret);
1885
9.68M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
32.6M
{
1931
32.6M
    const xmlChar *ret;
1932
1933
32.6M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
32.6M
    ctxt->nameNr--;
1936
32.6M
    if (ctxt->nameNr > 0)
1937
32.3M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
239k
    else
1939
239k
        ctxt->name = NULL;
1940
32.6M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
32.6M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
32.6M
    return (ret);
1943
32.6M
}
1944
1945
55.7M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
55.7M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
55.3k
        int *tmp;
1948
1949
55.3k
  ctxt->spaceMax *= 2;
1950
55.3k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
55.3k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
55.3k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
55.3k
  ctxt->spaceTab = tmp;
1958
55.3k
    }
1959
55.7M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
55.7M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
55.7M
    return(ctxt->spaceNr++);
1962
55.7M
}
1963
1964
54.6M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
54.6M
    int ret;
1966
54.6M
    if (ctxt->spaceNr <= 0) return(0);
1967
54.5M
    ctxt->spaceNr--;
1968
54.5M
    if (ctxt->spaceNr > 0)
1969
54.2M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
257k
    else
1971
257k
        ctxt->space = &ctxt->spaceTab[0];
1972
54.5M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
54.5M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
54.5M
    return(ret);
1975
54.6M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
3.49G
#define RAW (*ctxt->input->cur)
2013
3.17G
#define CUR (*ctxt->input->cur)
2014
2.86G
#define NXT(val) ctxt->input->cur[(val)]
2015
108M
#define CUR_PTR ctxt->input->cur
2016
1.41M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
445M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
222M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
411M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
348M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
292M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
237M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
105M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
105M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
282k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
282k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
1.27G
#define SKIP(val) do {             \
2037
1.27G
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
1.27G
    if (*ctxt->input->cur == 0)           \
2039
1.27G
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
1.27G
  } while (0)
2041
2042
173k
#define SKIPL(val) do {             \
2043
173k
    int skipl;                \
2044
33.4M
    for(skipl=0; skipl<val; skipl++) {         \
2045
33.2M
  if (*(ctxt->input->cur) == '\n') {       \
2046
680k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
32.6M
  } else ctxt->input->col++;         \
2048
33.2M
  ctxt->input->cur++;           \
2049
33.2M
    }                  \
2050
173k
    if (*ctxt->input->cur == 0)           \
2051
173k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
173k
  } while (0)
2053
2054
1.09G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
1.09G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
1.09G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
1.09G
  xmlSHRINK (ctxt);
2058
2059
4.54M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
4.54M
    if ((ctxt->input->buf) &&
2062
4.54M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
12.3k
        xmlParserInputShrink(ctxt->input);
2064
4.54M
    if (*ctxt->input->cur == 0)
2065
153k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
4.54M
}
2067
2068
3.45G
#define GROW if ((ctxt->progressive == 0) &&       \
2069
3.45G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
3.45G
  xmlGROW (ctxt);
2071
2072
797M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
797M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
797M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
797M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
797M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
797M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
797M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
797M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
797M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
797M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
797M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
18.2M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
797M
}
2095
2096
812M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
1.83G
#define NEXT xmlNextChar(ctxt)
2099
2100
103M
#define NEXT1 {               \
2101
103M
  ctxt->input->col++;           \
2102
103M
  ctxt->input->cur++;           \
2103
103M
  if (*ctxt->input->cur == 0)         \
2104
103M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
103M
    }
2106
2107
1.06G
#define NEXTL(l) do {             \
2108
1.06G
    if (*(ctxt->input->cur) == '\n') {         \
2109
9.04M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
1.05G
    } else ctxt->input->col++;           \
2111
1.06G
    ctxt->input->cur += l;        \
2112
1.06G
  } while (0)
2113
2114
1.12G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
30.7G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
27.7G
    if (l == 1) b[i++] = v;           \
2119
27.7G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
812M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
812M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
812M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
812M
        (ctxt->instate == XML_PARSER_START)) {
2141
227M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
227M
  cur = ctxt->input->cur;
2146
227M
  while (IS_BLANK_CH(*cur)) {
2147
65.8M
      if (*cur == '\n') {
2148
2.64M
    ctxt->input->line++; ctxt->input->col = 1;
2149
63.1M
      } else {
2150
63.1M
    ctxt->input->col++;
2151
63.1M
      }
2152
65.8M
      cur++;
2153
65.8M
      if (res < INT_MAX)
2154
65.8M
    res++;
2155
65.8M
      if (*cur == 0) {
2156
103k
    ctxt->input->cur = cur;
2157
103k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
103k
    cur = ctxt->input->cur;
2159
103k
      }
2160
65.8M
  }
2161
227M
  ctxt->input->cur = cur;
2162
585M
    } else {
2163
585M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
2.07G
  while (ctxt->instate != XML_PARSER_EOF) {
2166
2.07G
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
705M
    NEXT;
2168
1.36G
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
439M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
3.83M
                    break;
2174
435M
          xmlParsePEReference(ctxt);
2175
926M
            } else if (CUR == 0) {
2176
344M
                unsigned long consumed;
2177
344M
                xmlEntityPtr ent;
2178
2179
344M
                if (ctxt->inputNr <= 1)
2180
81.7k
                    break;
2181
2182
344M
                consumed = ctxt->input->consumed;
2183
344M
                xmlSaturatedAddSizeT(&consumed,
2184
344M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
344M
                ent = ctxt->input->entity;
2191
344M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
344M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
9.94k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
9.94k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
9.94k
                }
2197
2198
344M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
344M
                xmlPopInput(ctxt);
2201
581M
            } else {
2202
581M
                break;
2203
581M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
1.48G
      if (res < INT_MAX)
2213
1.48G
    res++;
2214
1.48G
        }
2215
585M
    }
2216
812M
    return(res);
2217
812M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
344M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
344M
    xmlParserInputPtr input;
2237
2238
344M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
344M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
344M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
344M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
344M
    input = inputPop(ctxt);
2247
344M
    if (input->entity != NULL)
2248
344M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
344M
    xmlFreeInputStream(input);
2250
344M
    if (*ctxt->input->cur == 0)
2251
165M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
344M
    return(CUR);
2253
344M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
344M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
344M
    int ret;
2267
344M
    if (input == NULL) return(-1);
2268
2269
344M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
344M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
344M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
344M
    ret = inputPush(ctxt, input);
2285
344M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
344M
    GROW;
2288
344M
    return(ret);
2289
344M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.14M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.14M
    int val = 0;
2311
1.14M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.14M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.14M
        (NXT(2) == 'x')) {
2318
586k
  SKIP(3);
2319
586k
  GROW;
2320
1.55M
  while (RAW != ';') { /* loop blocked by count */
2321
1.06M
      if (count++ > 20) {
2322
26.6k
    count = 0;
2323
26.6k
    GROW;
2324
26.6k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
26.6k
      }
2327
1.06M
      if ((RAW >= '0') && (RAW <= '9'))
2328
486k
          val = val * 16 + (CUR - '0');
2329
576k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
423k
          val = val * 16 + (CUR - 'a') + 10;
2331
153k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
56.2k
          val = val * 16 + (CUR - 'A') + 10;
2333
96.9k
      else {
2334
96.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
96.9k
    val = 0;
2336
96.9k
    break;
2337
96.9k
      }
2338
966k
      if (val > 0x110000)
2339
289k
          val = 0x110000;
2340
2341
966k
      NEXT;
2342
966k
      count++;
2343
966k
  }
2344
586k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
489k
      ctxt->input->col++;
2347
489k
      ctxt->input->cur++;
2348
489k
  }
2349
586k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
556k
  SKIP(2);
2351
556k
  GROW;
2352
2.18M
  while (RAW != ';') { /* loop blocked by count */
2353
1.72M
      if (count++ > 20) {
2354
30.2k
    count = 0;
2355
30.2k
    GROW;
2356
30.2k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
30.2k
      }
2359
1.72M
      if ((RAW >= '0') && (RAW <= '9'))
2360
1.62M
          val = val * 10 + (CUR - '0');
2361
101k
      else {
2362
101k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
101k
    val = 0;
2364
101k
    break;
2365
101k
      }
2366
1.62M
      if (val > 0x110000)
2367
327k
          val = 0x110000;
2368
2369
1.62M
      NEXT;
2370
1.62M
      count++;
2371
1.62M
  }
2372
556k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
455k
      ctxt->input->col++;
2375
455k
      ctxt->input->cur++;
2376
455k
  }
2377
556k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.14M
    if (val >= 0x110000) {
2389
2.91k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
2.91k
                "xmlParseCharRef: character reference out of bounds\n",
2391
2.91k
          val);
2392
1.13M
    } else if (IS_CHAR(val)) {
2393
917k
        return(val);
2394
917k
    } else {
2395
221k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
221k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
221k
                    val);
2398
221k
    }
2399
224k
    return(0);
2400
1.14M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
928k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
928k
    const xmlChar *ptr;
2423
928k
    xmlChar cur;
2424
928k
    int val = 0;
2425
2426
928k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
928k
    ptr = *str;
2428
928k
    cur = *ptr;
2429
928k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
50.3k
  ptr += 3;
2431
50.3k
  cur = *ptr;
2432
141k
  while (cur != ';') { /* Non input consuming loop */
2433
93.7k
      if ((cur >= '0') && (cur <= '9'))
2434
26.8k
          val = val * 16 + (cur - '0');
2435
66.9k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
10.2k
          val = val * 16 + (cur - 'a') + 10;
2437
56.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
53.9k
          val = val * 16 + (cur - 'A') + 10;
2439
2.75k
      else {
2440
2.75k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
2.75k
    val = 0;
2442
2.75k
    break;
2443
2.75k
      }
2444
91.0k
      if (val > 0x110000)
2445
27.0k
          val = 0x110000;
2446
2447
91.0k
      ptr++;
2448
91.0k
      cur = *ptr;
2449
91.0k
  }
2450
50.3k
  if (cur == ';')
2451
47.6k
      ptr++;
2452
877k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
877k
  ptr += 2;
2454
877k
  cur = *ptr;
2455
2.80M
  while (cur != ';') { /* Non input consuming loops */
2456
1.93M
      if ((cur >= '0') && (cur <= '9'))
2457
1.92M
          val = val * 10 + (cur - '0');
2458
4.76k
      else {
2459
4.76k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
4.76k
    val = 0;
2461
4.76k
    break;
2462
4.76k
      }
2463
1.92M
      if (val > 0x110000)
2464
7.31k
          val = 0x110000;
2465
2466
1.92M
      ptr++;
2467
1.92M
      cur = *ptr;
2468
1.92M
  }
2469
877k
  if (cur == ';')
2470
872k
      ptr++;
2471
877k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
928k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
928k
    if (val >= 0x110000) {
2483
643
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
643
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
643
                val);
2486
927k
    } else if (IS_CHAR(val)) {
2487
918k
        return(val);
2488
918k
    } else {
2489
8.93k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
8.93k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
8.93k
        val);
2492
8.93k
    }
2493
9.58k
    return(0);
2494
928k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
17.7M
#define growBuffer(buffer, n) {           \
2593
17.7M
    xmlChar *tmp;             \
2594
17.7M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
17.7M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
17.7M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
17.7M
    if (tmp == NULL) goto mem_error;         \
2598
17.7M
    buffer = tmp;             \
2599
17.7M
    buffer##_size = new_size;                                           \
2600
17.7M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
179M
                           int check) {
2617
179M
    xmlChar *buffer = NULL;
2618
179M
    size_t buffer_size = 0;
2619
179M
    size_t nbchars = 0;
2620
2621
179M
    xmlChar *current = NULL;
2622
179M
    xmlChar *rep = NULL;
2623
179M
    const xmlChar *last;
2624
179M
    xmlEntityPtr ent;
2625
179M
    int c,l;
2626
2627
179M
    if (str == NULL)
2628
27.4k
        return(NULL);
2629
179M
    last = str + len;
2630
2631
179M
    if (((ctxt->depth > 40) &&
2632
179M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
179M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
179M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
179M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
179M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
179M
    if (str < last)
2651
178M
  c = CUR_SCHAR(str, l);
2652
1.17M
    else
2653
1.17M
        c = 0;
2654
23.2G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
23.2G
           (c != end2) && (c != end3) &&
2656
23.2G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
23.0G
  if (c == 0) break;
2659
23.0G
        if ((c == '&') && (str[1] == '#')) {
2660
928k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
928k
      if (val == 0)
2662
9.58k
                goto int_error;
2663
918k
      COPY_BUF(0,buffer,nbchars,val);
2664
918k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
274
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
274
      }
2667
23.0G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
3.73G
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
3.73G
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
3.73G
      if ((ent != NULL) &&
2674
3.73G
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
1.69M
    if (ent->content != NULL) {
2676
1.69M
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
1.69M
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
376
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
376
        }
2680
1.69M
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
3.73G
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
166M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
2.75k
                    goto int_error;
2688
2689
166M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
733
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
733
                    xmlHaltParser(ctxt);
2692
733
                    ent->content[0] = 0;
2693
733
                    goto int_error;
2694
733
                }
2695
2696
166M
                ent->flags |= XML_ENT_EXPANDING;
2697
166M
    ctxt->depth++;
2698
166M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
166M
                        ent->length, what, 0, 0, 0, check);
2700
166M
    ctxt->depth--;
2701
166M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
166M
    if (rep == NULL) {
2704
56.8k
                    ent->content[0] = 0;
2705
56.8k
                    goto int_error;
2706
56.8k
                }
2707
2708
165M
                current = rep;
2709
117G
                while (*current != 0) { /* non input consuming loop */
2710
117G
                    buffer[nbchars++] = *current++;
2711
117G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
25.6M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
25.6M
                    }
2714
117G
                }
2715
165M
                xmlFree(rep);
2716
165M
                rep = NULL;
2717
3.57G
      } else if (ent != NULL) {
2718
43.0M
    int i = xmlStrlen(ent->name);
2719
43.0M
    const xmlChar *cur = ent->name;
2720
2721
43.0M
    buffer[nbchars++] = '&';
2722
43.0M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
1.59M
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
1.59M
    }
2725
872M
    for (;i > 0;i--)
2726
829M
        buffer[nbchars++] = *cur++;
2727
43.0M
    buffer[nbchars++] = ';';
2728
43.0M
      }
2729
19.3G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
3.54M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
3.54M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
3.54M
      if (ent != NULL) {
2735
3.38M
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
5.79k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
5.79k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
5.79k
      (ctxt->validate != 0)) {
2745
5.44k
      xmlLoadEntityContent(ctxt, ent);
2746
5.44k
        } else {
2747
348
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
348
      "not validating will not read content for PE entity %s\n",
2749
348
                          ent->name, NULL);
2750
348
        }
2751
5.79k
    }
2752
2753
3.38M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
251
                    goto int_error;
2755
2756
3.38M
                if (ent->flags & XML_ENT_EXPANDING) {
2757
624
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
624
                    xmlHaltParser(ctxt);
2759
624
                    if (ent->content != NULL)
2760
342
                        ent->content[0] = 0;
2761
624
                    goto int_error;
2762
624
                }
2763
2764
3.38M
                ent->flags |= XML_ENT_EXPANDING;
2765
3.38M
    ctxt->depth++;
2766
3.38M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
3.38M
                        ent->length, what, 0, 0, 0, check);
2768
3.38M
    ctxt->depth--;
2769
3.38M
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
3.38M
    if (rep == NULL) {
2772
3.74k
                    if (ent->content != NULL)
2773
466
                        ent->content[0] = 0;
2774
3.74k
                    goto int_error;
2775
3.74k
                }
2776
3.38M
                current = rep;
2777
3.25G
                while (*current != 0) { /* non input consuming loop */
2778
3.25G
                    buffer[nbchars++] = *current++;
2779
3.25G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
735k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
735k
                    }
2782
3.25G
                }
2783
3.38M
                xmlFree(rep);
2784
3.38M
                rep = NULL;
2785
3.38M
      }
2786
19.2G
  } else {
2787
19.2G
      COPY_BUF(l,buffer,nbchars,c);
2788
19.2G
      str += l;
2789
19.2G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
7.29M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
7.29M
      }
2792
19.2G
  }
2793
23.0G
  if (str < last)
2794
22.8G
      c = CUR_SCHAR(str, l);
2795
178M
  else
2796
178M
      c = 0;
2797
23.0G
    }
2798
179M
    buffer[nbchars] = 0;
2799
179M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
74.5k
int_error:
2804
74.5k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
74.5k
    if (buffer != NULL)
2807
74.5k
        xmlFree(buffer);
2808
74.5k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
40.7k
                           xmlChar end3) {
2836
40.7k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
40.7k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
40.7k
                                      end, end2, end3, 0));
2840
40.7k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
933k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
933k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
933k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
933k
                                      end, end2, end3, 0));
2868
933k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
21.8M
                     int blank_chars) {
2890
21.8M
    int i, ret;
2891
21.8M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
21.8M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
705k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
21.1M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
21.1M
        (*(ctxt->space) == -2))
2905
5.49M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
15.6M
    if (blank_chars == 0) {
2911
50.2M
  for (i = 0;i < len;i++)
2912
43.1M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
8.78M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
13.8M
    if (ctxt->node == NULL) return(0);
2919
13.7M
    if (ctxt->myDoc != NULL) {
2920
13.7M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
13.7M
        if (ret == 0) return(1);
2922
10.1M
        if (ret == 1) return(0);
2923
10.1M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
10.0M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
9.93M
    if ((ctxt->node->children == NULL) &&
2930
9.93M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
9.93M
    lastChild = xmlGetLastChild(ctxt->node);
2933
9.93M
    if (lastChild == NULL) {
2934
1.20M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
1.20M
            (ctxt->node->content != NULL)) return(0);
2936
8.72M
    } else if (xmlNodeIsText(lastChild))
2937
89.8k
        return(0);
2938
8.63M
    else if ((ctxt->node->children != NULL) &&
2939
8.63M
             (xmlNodeIsText(ctxt->node->children)))
2940
101k
        return(0);
2941
9.74M
    return(1);
2942
9.93M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
54.3M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
54.3M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
54.3M
    xmlChar *buffer = NULL;
2973
54.3M
    int len = 0;
2974
54.3M
    int max = XML_MAX_NAMELEN;
2975
54.3M
    xmlChar *ret = NULL;
2976
54.3M
    const xmlChar *cur = name;
2977
54.3M
    int c;
2978
2979
54.3M
    if (prefix == NULL) return(NULL);
2980
54.3M
    *prefix = NULL;
2981
2982
54.3M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
54.3M
    if (cur[0] == ':')
2993
18.4k
  return(xmlStrdup(name));
2994
2995
54.3M
    c = *cur++;
2996
226M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
171M
  buf[len++] = c;
2998
171M
  c = *cur++;
2999
171M
    }
3000
54.3M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
10.3k
  max = len * 2;
3006
3007
10.3k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
10.3k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
10.3k
  memcpy(buffer, buf, len);
3013
15.8M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
15.8M
      if (len + 10 > max) {
3015
15.2k
          xmlChar *tmp;
3016
3017
15.2k
    max *= 2;
3018
15.2k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
15.2k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
15.2k
    buffer = tmp;
3025
15.2k
      }
3026
15.8M
      buffer[len++] = c;
3027
15.8M
      c = *cur++;
3028
15.8M
  }
3029
10.3k
  buffer[len] = 0;
3030
10.3k
    }
3031
3032
54.3M
    if ((c == ':') && (*cur == 0)) {
3033
28.8k
        if (buffer != NULL)
3034
211
      xmlFree(buffer);
3035
28.8k
  *prefix = NULL;
3036
28.8k
  return(xmlStrdup(name));
3037
28.8k
    }
3038
3039
54.3M
    if (buffer == NULL)
3040
54.3M
  ret = xmlStrndup(buf, len);
3041
10.1k
    else {
3042
10.1k
  ret = buffer;
3043
10.1k
  buffer = NULL;
3044
10.1k
  max = XML_MAX_NAMELEN;
3045
10.1k
    }
3046
3047
3048
54.3M
    if (c == ':') {
3049
2.03M
  c = *cur;
3050
2.03M
        *prefix = ret;
3051
2.03M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
2.03M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
2.03M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
2.03M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
2.03M
        (c == '_') || (c == ':'))) {
3063
27.0k
      int l;
3064
27.0k
      int first = CUR_SCHAR(cur, l);
3065
3066
27.0k
      if (!IS_LETTER(first) && (first != '_')) {
3067
10.2k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
10.2k
          "Name %s is not XML Namespace compliant\n",
3069
10.2k
          name);
3070
10.2k
      }
3071
27.0k
  }
3072
2.03M
  cur++;
3073
3074
12.7M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
10.7M
      buf[len++] = c;
3076
10.7M
      c = *cur++;
3077
10.7M
  }
3078
2.03M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
5.44k
      max = len * 2;
3084
3085
5.44k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
5.44k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
5.44k
      memcpy(buffer, buf, len);
3091
8.42M
      while (c != 0) { /* tested bigname2.xml */
3092
8.41M
    if (len + 10 > max) {
3093
7.32k
        xmlChar *tmp;
3094
3095
7.32k
        max *= 2;
3096
7.32k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
7.32k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
7.32k
        buffer = tmp;
3103
7.32k
    }
3104
8.41M
    buffer[len++] = c;
3105
8.41M
    c = *cur++;
3106
8.41M
      }
3107
5.44k
      buffer[len] = 0;
3108
5.44k
  }
3109
3110
2.03M
  if (buffer == NULL)
3111
2.02M
      ret = xmlStrndup(buf, len);
3112
5.44k
  else {
3113
5.44k
      ret = buffer;
3114
5.44k
  }
3115
2.03M
    }
3116
3117
54.3M
    return(ret);
3118
54.3M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
257M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
257M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
154M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
154M
      (((c >= 'a') && (c <= 'z')) ||
3160
154M
       ((c >= 'A') && (c <= 'Z')) ||
3161
154M
       (c == '_') || (c == ':') ||
3162
154M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
154M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
154M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
154M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
154M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
154M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
154M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
154M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
154M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
154M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
154M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
154M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
153M
      return(1);
3175
154M
    } else {
3176
103M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
103M
      return(1);
3178
103M
    }
3179
774k
    return(0);
3180
257M
}
3181
3182
static int
3183
7.52G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
7.52G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
4.29G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
4.29G
      (((c >= 'a') && (c <= 'z')) ||
3191
4.29G
       ((c >= 'A') && (c <= 'Z')) ||
3192
4.29G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
4.29G
       (c == '_') || (c == ':') ||
3194
4.29G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
4.29G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
4.29G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
4.29G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
4.29G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
4.29G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
4.29G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
4.29G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
4.29G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
4.29G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
4.29G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
4.29G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
4.29G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
4.29G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
4.29G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
4.13G
       return(1);
3210
4.29G
    } else {
3211
3.22G
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
3.22G
            (c == '.') || (c == '-') ||
3213
3.22G
      (c == '_') || (c == ':') ||
3214
3.22G
      (IS_COMBINING(c)) ||
3215
3.22G
      (IS_EXTENDER(c)))
3216
3.12G
      return(1);
3217
3.22G
    }
3218
260M
    return(0);
3219
7.52G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
7.21M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
7.21M
    int len = 0, l;
3227
7.21M
    int c;
3228
7.21M
    int count = 0;
3229
7.21M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
3.44M
                    XML_MAX_TEXT_LENGTH :
3231
7.21M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
7.21M
    GROW;
3241
7.21M
    if (ctxt->instate == XML_PARSER_EOF)
3242
72
        return(NULL);
3243
7.21M
    c = CUR_CHAR(l);
3244
7.21M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
3.88M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
3.88M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
3.81M
         ((c >= 'A') && (c <= 'Z')) ||
3252
3.81M
         (c == '_') || (c == ':') ||
3253
3.81M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
3.81M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
3.81M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
3.81M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
3.81M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
3.81M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
3.81M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
3.81M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
3.81M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
3.81M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
3.81M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
3.81M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.45M
      return(NULL);
3266
1.45M
  }
3267
2.43M
  len += l;
3268
2.43M
  NEXTL(l);
3269
2.43M
  c = CUR_CHAR(l);
3270
35.8M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
35.8M
         (((c >= 'a') && (c <= 'z')) ||
3272
35.7M
          ((c >= 'A') && (c <= 'Z')) ||
3273
35.7M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
35.7M
          (c == '_') || (c == ':') ||
3275
35.7M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
35.7M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
35.7M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
35.7M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
35.7M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
35.7M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
35.7M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
35.7M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
35.7M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
35.7M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
35.7M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
35.7M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
35.7M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
35.7M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
35.7M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
35.7M
    )) {
3291
33.3M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
228k
    count = 0;
3293
228k
    GROW;
3294
228k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
228k
      }
3297
33.3M
            if (len <= INT_MAX - l)
3298
33.3M
          len += l;
3299
33.3M
      NEXTL(l);
3300
33.3M
      c = CUR_CHAR(l);
3301
33.3M
  }
3302
3.32M
    } else {
3303
3.32M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
3.32M
      (!IS_LETTER(c) && (c != '_') &&
3305
3.22M
       (c != ':'))) {
3306
1.36M
      return(NULL);
3307
1.36M
  }
3308
1.95M
  len += l;
3309
1.95M
  NEXTL(l);
3310
1.95M
  c = CUR_CHAR(l);
3311
3312
31.3M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
31.3M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
31.3M
    (c == '.') || (c == '-') ||
3315
31.3M
    (c == '_') || (c == ':') ||
3316
31.3M
    (IS_COMBINING(c)) ||
3317
31.3M
    (IS_EXTENDER(c)))) {
3318
29.4M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
211k
    count = 0;
3320
211k
    GROW;
3321
211k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
211k
      }
3324
29.4M
            if (len <= INT_MAX - l)
3325
29.4M
          len += l;
3326
29.4M
      NEXTL(l);
3327
29.4M
      c = CUR_CHAR(l);
3328
29.4M
  }
3329
1.95M
    }
3330
4.39M
    if (len > maxLength) {
3331
259
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
259
        return(NULL);
3333
259
    }
3334
4.39M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
4.39M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
1.75k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
4.39M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
4.39M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
570M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
570M
    const xmlChar *in;
3370
570M
    const xmlChar *ret;
3371
570M
    size_t count = 0;
3372
570M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
199M
                       XML_MAX_TEXT_LENGTH :
3374
570M
                       XML_MAX_NAME_LENGTH;
3375
3376
570M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
570M
    in = ctxt->input->cur;
3386
570M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
570M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
570M
  (*in == '_') || (*in == ':')) {
3389
567M
  in++;
3390
2.38G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
2.38G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
2.38G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
2.38G
         (*in == '_') || (*in == '-') ||
3394
2.38G
         (*in == ':') || (*in == '.'))
3395
1.81G
      in++;
3396
567M
  if ((*in > 0) && (*in < 0x80)) {
3397
563M
      count = in - ctxt->input->cur;
3398
563M
            if (count > maxLength) {
3399
88
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
88
                return(NULL);
3401
88
            }
3402
563M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
563M
      ctxt->input->cur = in;
3404
563M
      ctxt->input->col += count;
3405
563M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
563M
      return(ret);
3408
563M
  }
3409
567M
    }
3410
    /* accelerator for special cases */
3411
7.21M
    return(xmlParseNameComplex(ctxt));
3412
570M
}
3413
3414
static const xmlChar *
3415
1.12M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
1.12M
    int len = 0, l;
3417
1.12M
    int c;
3418
1.12M
    int count = 0;
3419
1.12M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
368k
                    XML_MAX_TEXT_LENGTH :
3421
1.12M
                    XML_MAX_NAME_LENGTH;
3422
1.12M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
1.12M
    GROW;
3432
1.12M
    startPosition = CUR_PTR - BASE_PTR;
3433
1.12M
    c = CUR_CHAR(l);
3434
1.12M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
1.12M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
838k
  return(NULL);
3437
838k
    }
3438
3439
12.5M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
12.5M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
12.2M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
104k
      count = 0;
3443
104k
      GROW;
3444
104k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
104k
  }
3447
12.2M
        if (len <= INT_MAX - l)
3448
12.2M
      len += l;
3449
12.2M
  NEXTL(l);
3450
12.2M
  c = CUR_CHAR(l);
3451
12.2M
  if (c == 0) {
3452
45.6k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
45.6k
      ctxt->input->cur -= l;
3459
45.6k
      GROW;
3460
45.6k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
45.6k
      ctxt->input->cur += l;
3463
45.6k
      c = CUR_CHAR(l);
3464
45.6k
  }
3465
12.2M
    }
3466
285k
    if (len > maxLength) {
3467
120
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
120
        return(NULL);
3469
120
    }
3470
285k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
285k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
68.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
68.0M
    const xmlChar *in, *e;
3491
68.0M
    const xmlChar *ret;
3492
68.0M
    size_t count = 0;
3493
68.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
14.0M
                       XML_MAX_TEXT_LENGTH :
3495
68.0M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
68.0M
    in = ctxt->input->cur;
3505
68.0M
    e = ctxt->input->end;
3506
68.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
68.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
68.0M
   (*in == '_')) && (in < e)) {
3509
67.1M
  in++;
3510
210M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
210M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
210M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
210M
          (*in == '_') || (*in == '-') ||
3514
210M
          (*in == '.')) && (in < e))
3515
142M
      in++;
3516
67.1M
  if (in >= e)
3517
9.05k
      goto complex;
3518
67.1M
  if ((*in > 0) && (*in < 0x80)) {
3519
66.9M
      count = in - ctxt->input->cur;
3520
66.9M
            if (count > maxLength) {
3521
35
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
35
                return(NULL);
3523
35
            }
3524
66.9M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
66.9M
      ctxt->input->cur = in;
3526
66.9M
      ctxt->input->col += count;
3527
66.9M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
66.9M
      return(ret);
3531
66.9M
  }
3532
67.1M
    }
3533
1.12M
complex:
3534
1.12M
    return(xmlParseNCNameComplex(ctxt));
3535
68.0M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
32.2M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
32.2M
    register const xmlChar *cmp = other;
3551
32.2M
    register const xmlChar *in;
3552
32.2M
    const xmlChar *ret;
3553
3554
32.2M
    GROW;
3555
32.2M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
32.2M
    in = ctxt->input->cur;
3559
158M
    while (*in != 0 && *in == *cmp) {
3560
126M
  ++in;
3561
126M
  ++cmp;
3562
126M
    }
3563
32.2M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
31.5M
  ctxt->input->col += in - ctxt->input->cur;
3566
31.5M
  ctxt->input->cur = in;
3567
31.5M
  return (const xmlChar*) 1;
3568
31.5M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
689k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
689k
    if (ret == other) {
3573
29.6k
  return (const xmlChar*) 1;
3574
29.6k
    }
3575
659k
    return ret;
3576
689k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
256M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
256M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
256M
    const xmlChar *cur = *str;
3600
256M
    int len = 0, l;
3601
256M
    int c;
3602
256M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
96.7M
                    XML_MAX_TEXT_LENGTH :
3604
256M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
256M
    c = CUR_SCHAR(cur, l);
3611
256M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
45.7k
  return(NULL);
3613
45.7k
    }
3614
3615
256M
    COPY_BUF(l,buf,len,c);
3616
256M
    cur += l;
3617
256M
    c = CUR_SCHAR(cur, l);
3618
3.21G
    while (xmlIsNameChar(ctxt, c)) {
3619
2.97G
  COPY_BUF(l,buf,len,c);
3620
2.97G
  cur += l;
3621
2.97G
  c = CUR_SCHAR(cur, l);
3622
2.97G
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
18.6M
      xmlChar *buffer;
3628
18.6M
      int max = len * 2;
3629
3630
18.6M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
18.6M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
18.6M
      memcpy(buffer, buf, len);
3636
4.26G
      while (xmlIsNameChar(ctxt, c)) {
3637
4.24G
    if (len + 10 > max) {
3638
18.7M
        xmlChar *tmp;
3639
3640
18.7M
        max *= 2;
3641
18.7M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
18.7M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
18.7M
        buffer = tmp;
3648
18.7M
    }
3649
4.24G
    COPY_BUF(l,buffer,len,c);
3650
4.24G
    cur += l;
3651
4.24G
    c = CUR_SCHAR(cur, l);
3652
4.24G
                if (len > maxLength) {
3653
24
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
24
                    xmlFree(buffer);
3655
24
                    return(NULL);
3656
24
                }
3657
4.24G
      }
3658
18.6M
      buffer[len] = 0;
3659
18.6M
      *str = cur;
3660
18.6M
      return(buffer);
3661
18.6M
  }
3662
2.97G
    }
3663
238M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
238M
    *str = cur;
3668
238M
    return(xmlStrndup(buf, len));
3669
238M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
3.22M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
3.22M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
3.22M
    int len = 0, l;
3690
3.22M
    int c;
3691
3.22M
    int count = 0;
3692
3.22M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
902k
                    XML_MAX_TEXT_LENGTH :
3694
3.22M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
3.22M
    GROW;
3701
3.22M
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
3.22M
    c = CUR_CHAR(l);
3704
3705
19.0M
    while (xmlIsNameChar(ctxt, c)) {
3706
15.8M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
15.8M
  COPY_BUF(l,buf,len,c);
3711
15.8M
  NEXTL(l);
3712
15.8M
  c = CUR_CHAR(l);
3713
15.8M
  if (c == 0) {
3714
2.32k
      count = 0;
3715
2.32k
      GROW;
3716
2.32k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
2.32k
            c = CUR_CHAR(l);
3719
2.32k
  }
3720
15.8M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
3.83k
      xmlChar *buffer;
3726
3.83k
      int max = len * 2;
3727
3728
3.83k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
3.83k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
3.83k
      memcpy(buffer, buf, len);
3734
9.01M
      while (xmlIsNameChar(ctxt, c)) {
3735
9.01M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
90.5k
        count = 0;
3737
90.5k
        GROW;
3738
90.5k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
90.5k
    }
3743
9.01M
    if (len + 10 > max) {
3744
6.84k
        xmlChar *tmp;
3745
3746
6.84k
        max *= 2;
3747
6.84k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
6.84k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
6.84k
        buffer = tmp;
3754
6.84k
    }
3755
9.01M
    COPY_BUF(l,buffer,len,c);
3756
9.01M
    NEXTL(l);
3757
9.01M
    c = CUR_CHAR(l);
3758
9.01M
                if (len > maxLength) {
3759
63
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
63
                    xmlFree(buffer);
3761
63
                    return(NULL);
3762
63
                }
3763
9.01M
      }
3764
3.77k
      buffer[len] = 0;
3765
3.77k
      return(buffer);
3766
3.83k
  }
3767
15.8M
    }
3768
3.22M
    if (len == 0)
3769
51.2k
        return(NULL);
3770
3.17M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
3.17M
    return(xmlStrndup(buf, len));
3775
3.17M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
4.88M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
4.88M
    xmlChar *buf = NULL;
3795
4.88M
    int len = 0;
3796
4.88M
    int size = XML_PARSER_BUFFER_SIZE;
3797
4.88M
    int c, l;
3798
4.88M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
1.38M
                    XML_MAX_HUGE_LENGTH :
3800
4.88M
                    XML_MAX_TEXT_LENGTH;
3801
4.88M
    xmlChar stop;
3802
4.88M
    xmlChar *ret = NULL;
3803
4.88M
    const xmlChar *cur = NULL;
3804
4.88M
    xmlParserInputPtr input;
3805
3806
4.88M
    if (RAW == '"') stop = '"';
3807
1.13M
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
4.88M
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
4.88M
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
4.88M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
4.88M
    input = ctxt->input;
3824
4.88M
    GROW;
3825
4.88M
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
4.88M
    NEXT;
3828
4.88M
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
404M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
404M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
399M
  if (len + 5 >= size) {
3841
897k
      xmlChar *tmp;
3842
3843
897k
      size *= 2;
3844
897k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
897k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
897k
      buf = tmp;
3850
897k
  }
3851
399M
  COPY_BUF(l,buf,len,c);
3852
399M
  NEXTL(l);
3853
3854
399M
  GROW;
3855
399M
  c = CUR_CHAR(l);
3856
399M
  if (c == 0) {
3857
3.53k
      GROW;
3858
3.53k
      c = CUR_CHAR(l);
3859
3.53k
  }
3860
3861
399M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
399M
    }
3867
4.88M
    buf[len] = 0;
3868
4.88M
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
4.88M
    if (c != stop) {
3871
5.07k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
5.07k
        goto error;
3873
5.07k
    }
3874
4.87M
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
4.87M
    cur = buf;
3882
258M
    while (*cur != 0) { /* non input consuming */
3883
253M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
4.48M
      xmlChar *name;
3885
4.48M
      xmlChar tmp = *cur;
3886
4.48M
            int nameOk = 0;
3887
3888
4.48M
      cur++;
3889
4.48M
      name = xmlParseStringName(ctxt, &cur);
3890
4.48M
            if (name != NULL) {
3891
4.47M
                nameOk = 1;
3892
4.47M
                xmlFree(name);
3893
4.47M
            }
3894
4.48M
            if ((nameOk == 0) || (*cur != ';')) {
3895
21.8k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
21.8k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
21.8k
                            tmp);
3898
21.8k
                goto error;
3899
21.8k
      }
3900
4.45M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
4.45M
    (ctxt->inputNr == 1)) {
3902
3.65k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
3.65k
                goto error;
3904
3.65k
      }
3905
4.45M
      if (*cur == 0)
3906
0
          break;
3907
4.45M
  }
3908
253M
  cur++;
3909
253M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
4.85M
    ++ctxt->depth;
3920
4.85M
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
4.85M
                                     0, 0, 0, /* check */ 1);
3922
4.85M
    --ctxt->depth;
3923
3924
4.85M
    if (orig != NULL) {
3925
4.85M
        *orig = buf;
3926
4.85M
        buf = NULL;
3927
4.85M
    }
3928
3929
4.88M
error:
3930
4.88M
    if (buf != NULL)
3931
30.6k
        xmlFree(buf);
3932
4.88M
    return(ret);
3933
4.85M
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.58M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.58M
    xmlChar limit = 0;
3950
1.58M
    xmlChar *buf = NULL;
3951
1.58M
    xmlChar *rep = NULL;
3952
1.58M
    size_t len = 0;
3953
1.58M
    size_t buf_size = 0;
3954
1.58M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
509k
                       XML_MAX_HUGE_LENGTH :
3956
1.58M
                       XML_MAX_TEXT_LENGTH;
3957
1.58M
    int c, l, in_space = 0;
3958
1.58M
    xmlChar *current = NULL;
3959
1.58M
    xmlEntityPtr ent;
3960
3961
1.58M
    if (NXT(0) == '"') {
3962
946k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
946k
  limit = '"';
3964
946k
        NEXT;
3965
946k
    } else if (NXT(0) == '\'') {
3966
641k
  limit = '\'';
3967
641k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
641k
        NEXT;
3969
641k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.58M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.58M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.58M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.58M
    c = CUR_CHAR(l);
3985
54.1M
    while (((NXT(0) != limit) && /* checked */
3986
54.1M
            (IS_CHAR(c)) && (c != '<')) &&
3987
54.1M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
52.5M
  if (c == '&') {
3989
8.55M
      in_space = 0;
3990
8.55M
      if (NXT(1) == '#') {
3991
588k
    int val = xmlParseCharRef(ctxt);
3992
3993
588k
    if (val == '&') {
3994
10.1k
        if (ctxt->replaceEntities) {
3995
3.87k
      if (len + 10 > buf_size) {
3996
402
          growBuffer(buf, 10);
3997
402
      }
3998
3.87k
      buf[len++] = '&';
3999
6.24k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
6.24k
      if (len + 10 > buf_size) {
4005
558
          growBuffer(buf, 10);
4006
558
      }
4007
6.24k
      buf[len++] = '&';
4008
6.24k
      buf[len++] = '#';
4009
6.24k
      buf[len++] = '3';
4010
6.24k
      buf[len++] = '8';
4011
6.24k
      buf[len++] = ';';
4012
6.24k
        }
4013
578k
    } else if (val != 0) {
4014
480k
        if (len + 10 > buf_size) {
4015
3.48k
      growBuffer(buf, 10);
4016
3.48k
        }
4017
480k
        len += xmlCopyChar(0, &buf[len], val);
4018
480k
    }
4019
7.96M
      } else {
4020
7.96M
    ent = xmlParseEntityRef(ctxt);
4021
7.96M
    if ((ent != NULL) &&
4022
7.96M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
168k
        if (len + 10 > buf_size) {
4024
318
      growBuffer(buf, 10);
4025
318
        }
4026
168k
        if ((ctxt->replaceEntities == 0) &&
4027
168k
            (ent->content[0] == '&')) {
4028
54.8k
      buf[len++] = '&';
4029
54.8k
      buf[len++] = '#';
4030
54.8k
      buf[len++] = '3';
4031
54.8k
      buf[len++] = '8';
4032
54.8k
      buf[len++] = ';';
4033
113k
        } else {
4034
113k
      buf[len++] = ent->content[0];
4035
113k
        }
4036
7.79M
    } else if ((ent != NULL) &&
4037
7.79M
               (ctxt->replaceEntities != 0)) {
4038
4.19M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
4.19M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
19
                            goto error;
4041
4042
4.19M
      ++ctxt->depth;
4043
4.19M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
4.19M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
4.19M
                                /* check */ 1);
4046
4.19M
      --ctxt->depth;
4047
4.19M
      if (rep != NULL) {
4048
4.17M
          current = rep;
4049
1.23G
          while (*current != 0) { /* non input consuming */
4050
1.23G
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
1.23G
                                    (*current == 0x9)) {
4052
502k
                                    buf[len++] = 0x20;
4053
502k
                                    current++;
4054
502k
                                } else
4055
1.23G
                                    buf[len++] = *current++;
4056
1.23G
        if (len + 10 > buf_size) {
4057
66.2k
            growBuffer(buf, 10);
4058
66.2k
        }
4059
1.23G
          }
4060
4.17M
          xmlFree(rep);
4061
4.17M
          rep = NULL;
4062
4.17M
      }
4063
4.19M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
4.19M
    } else if (ent != NULL) {
4071
3.10M
        int i = xmlStrlen(ent->name);
4072
3.10M
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
3.10M
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
3.10M
      (ent->content != NULL)) {
4081
3.02M
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
28.7k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
28.7k
                            ctxt->sizeentcopy = ent->length;
4085
4086
28.7k
                            ++ctxt->depth;
4087
28.7k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
28.7k
                                    ent->content, ent->length,
4089
28.7k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
28.7k
                                    /* check */ 1);
4091
28.7k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
28.7k
                            if (ctxt->inSubset == 0) {
4100
26.1k
                                ent->flags |= XML_ENT_CHECKED;
4101
26.1k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
26.1k
                            }
4103
4104
28.7k
                            if (rep != NULL) {
4105
27.3k
                                xmlFree(rep);
4106
27.3k
                                rep = NULL;
4107
27.3k
                            } else {
4108
1.39k
                                ent->content[0] = 0;
4109
1.39k
                            }
4110
4111
28.7k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
1.09k
                                goto error;
4113
3.00M
                        } else {
4114
3.00M
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
136
                                goto error;
4116
3.00M
                        }
4117
3.02M
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
3.10M
        buf[len++] = '&';
4123
3.11M
        while (len + i + 10 > buf_size) {
4124
21.0k
      growBuffer(buf, i + 10);
4125
21.0k
        }
4126
10.5M
        for (;i > 0;i--)
4127
7.40M
      buf[len++] = *cur++;
4128
3.10M
        buf[len++] = ';';
4129
3.10M
    }
4130
7.96M
      }
4131
44.0M
  } else {
4132
44.0M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
5.05M
          if ((len != 0) || (!normalize)) {
4134
4.95M
        if ((!normalize) || (!in_space)) {
4135
4.84M
      COPY_BUF(l,buf,len,0x20);
4136
4.85M
      while (len + 10 > buf_size) {
4137
18.3k
          growBuffer(buf, 10);
4138
18.3k
      }
4139
4.84M
        }
4140
4.95M
        in_space = 1;
4141
4.95M
    }
4142
38.9M
      } else {
4143
38.9M
          in_space = 0;
4144
38.9M
    COPY_BUF(l,buf,len,c);
4145
38.9M
    if (len + 10 > buf_size) {
4146
99.8k
        growBuffer(buf, 10);
4147
99.8k
    }
4148
38.9M
      }
4149
44.0M
      NEXTL(l);
4150
44.0M
  }
4151
52.5M
  GROW;
4152
52.5M
  c = CUR_CHAR(l);
4153
52.5M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
52.5M
    }
4159
1.58M
    if (ctxt->instate == XML_PARSER_EOF)
4160
2.39k
        goto error;
4161
4162
1.58M
    if ((in_space) && (normalize)) {
4163
73.4k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
35.2k
    }
4165
1.58M
    buf[len] = 0;
4166
1.58M
    if (RAW == '<') {
4167
309k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.27M
    } else if (RAW != limit) {
4169
263k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
112k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
112k
         "invalid character in attribute value\n");
4172
150k
  } else {
4173
150k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
150k
         "AttValue: ' expected\n");
4175
150k
        }
4176
263k
    } else
4177
1.01M
  NEXT;
4178
4179
1.58M
    if (attlen != NULL) *attlen = len;
4180
1.58M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
3.64k
error:
4185
3.64k
    if (buf != NULL)
4186
3.64k
        xmlFree(buf);
4187
3.64k
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
3.64k
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
21.8M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
21.8M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
21.8M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
21.8M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
2.47M
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
2.47M
    xmlChar *buf = NULL;
4250
2.47M
    int len = 0;
4251
2.47M
    int size = XML_PARSER_BUFFER_SIZE;
4252
2.47M
    int cur, l;
4253
2.47M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
833k
                    XML_MAX_TEXT_LENGTH :
4255
2.47M
                    XML_MAX_NAME_LENGTH;
4256
2.47M
    xmlChar stop;
4257
2.47M
    int state = ctxt->instate;
4258
2.47M
    int count = 0;
4259
4260
2.47M
    SHRINK;
4261
2.47M
    if (RAW == '"') {
4262
416k
        NEXT;
4263
416k
  stop = '"';
4264
2.05M
    } else if (RAW == '\'') {
4265
2.03M
        NEXT;
4266
2.03M
  stop = '\'';
4267
2.03M
    } else {
4268
24.4k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
24.4k
  return(NULL);
4270
24.4k
    }
4271
4272
2.44M
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
2.44M
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
2.44M
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
2.44M
    cur = CUR_CHAR(l);
4279
48.2M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
45.7M
  if (len + 5 >= size) {
4281
90.3k
      xmlChar *tmp;
4282
4283
90.3k
      size *= 2;
4284
90.3k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
90.3k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
90.3k
      buf = tmp;
4292
90.3k
  }
4293
45.7M
  count++;
4294
45.7M
  if (count > 50) {
4295
610k
      SHRINK;
4296
610k
      GROW;
4297
610k
      count = 0;
4298
610k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
610k
  }
4303
45.7M
  COPY_BUF(l,buf,len,cur);
4304
45.7M
  NEXTL(l);
4305
45.7M
  cur = CUR_CHAR(l);
4306
45.7M
  if (cur == 0) {
4307
600k
      GROW;
4308
600k
      SHRINK;
4309
600k
      cur = CUR_CHAR(l);
4310
600k
  }
4311
45.7M
        if (len > maxLength) {
4312
96
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
96
            xmlFree(buf);
4314
96
            ctxt->instate = (xmlParserInputState) state;
4315
96
            return(NULL);
4316
96
        }
4317
45.7M
    }
4318
2.44M
    buf[len] = 0;
4319
2.44M
    ctxt->instate = (xmlParserInputState) state;
4320
2.44M
    if (!IS_CHAR(cur)) {
4321
604k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
1.84M
    } else {
4323
1.84M
  NEXT;
4324
1.84M
    }
4325
2.44M
    return(buf);
4326
2.44M
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
1.71M
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
1.71M
    xmlChar *buf = NULL;
4344
1.71M
    int len = 0;
4345
1.71M
    int size = XML_PARSER_BUFFER_SIZE;
4346
1.71M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
438k
                    XML_MAX_TEXT_LENGTH :
4348
1.71M
                    XML_MAX_NAME_LENGTH;
4349
1.71M
    xmlChar cur;
4350
1.71M
    xmlChar stop;
4351
1.71M
    int count = 0;
4352
1.71M
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
1.71M
    SHRINK;
4355
1.71M
    if (RAW == '"') {
4356
91.6k
        NEXT;
4357
91.6k
  stop = '"';
4358
1.62M
    } else if (RAW == '\'') {
4359
1.60M
        NEXT;
4360
1.60M
  stop = '\'';
4361
1.60M
    } else {
4362
16.9k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
16.9k
  return(NULL);
4364
16.9k
    }
4365
1.70M
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
1.70M
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
1.70M
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
1.70M
    cur = CUR;
4372
18.4M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
16.7M
  if (len + 1 >= size) {
4374
12.9k
      xmlChar *tmp;
4375
4376
12.9k
      size *= 2;
4377
12.9k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
12.9k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
12.9k
      buf = tmp;
4384
12.9k
  }
4385
16.7M
  buf[len++] = cur;
4386
16.7M
  count++;
4387
16.7M
  if (count > 50) {
4388
144k
      SHRINK;
4389
144k
      GROW;
4390
144k
      count = 0;
4391
144k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
144k
  }
4396
16.7M
  NEXT;
4397
16.7M
  cur = CUR;
4398
16.7M
  if (cur == 0) {
4399
5.42k
      GROW;
4400
5.42k
      SHRINK;
4401
5.42k
      cur = CUR;
4402
5.42k
  }
4403
16.7M
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
16.7M
    }
4409
1.70M
    buf[len] = 0;
4410
1.70M
    if (cur != stop) {
4411
737k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
962k
    } else {
4413
962k
  NEXT;
4414
962k
    }
4415
1.70M
    ctxt->instate = oldstate;
4416
1.70M
    return(buf);
4417
1.70M
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
88.2M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
88.2M
    const xmlChar *in;
4482
88.2M
    int nbchar = 0;
4483
88.2M
    int line = ctxt->input->line;
4484
88.2M
    int col = ctxt->input->col;
4485
88.2M
    int ccol;
4486
4487
88.2M
    SHRINK;
4488
88.2M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
88.2M
    in = ctxt->input->cur;
4494
106M
    do {
4495
135M
get_more_space:
4496
165M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
135M
        if (*in == 0xA) {
4498
30.9M
            do {
4499
30.9M
                ctxt->input->line++; ctxt->input->col = 1;
4500
30.9M
                in++;
4501
30.9M
            } while (*in == 0xA);
4502
29.6M
            goto get_more_space;
4503
29.6M
        }
4504
106M
        if (*in == '<') {
4505
22.5M
            nbchar = in - ctxt->input->cur;
4506
22.5M
            if (nbchar > 0) {
4507
22.5M
                const xmlChar *tmp = ctxt->input->cur;
4508
22.5M
                ctxt->input->cur = in;
4509
4510
22.5M
                if ((ctxt->sax != NULL) &&
4511
22.5M
                    (ctxt->sax->ignorableWhitespace !=
4512
22.5M
                     ctxt->sax->characters)) {
4513
8.00M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
6.44M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
6.44M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
6.44M
                                                   tmp, nbchar);
4517
6.44M
                    } else {
4518
1.56M
                        if (ctxt->sax->characters != NULL)
4519
1.56M
                            ctxt->sax->characters(ctxt->userData,
4520
1.56M
                                                  tmp, nbchar);
4521
1.56M
                        if (*ctxt->space == -1)
4522
434k
                            *ctxt->space = -2;
4523
1.56M
                    }
4524
14.5M
                } else if ((ctxt->sax != NULL) &&
4525
14.5M
                           (ctxt->sax->characters != NULL)) {
4526
14.5M
                    ctxt->sax->characters(ctxt->userData,
4527
14.5M
                                          tmp, nbchar);
4528
14.5M
                }
4529
22.5M
            }
4530
22.5M
            return;
4531
22.5M
        }
4532
4533
113M
get_more:
4534
113M
        ccol = ctxt->input->col;
4535
1.77G
        while (test_char_data[*in]) {
4536
1.65G
            in++;
4537
1.65G
            ccol++;
4538
1.65G
        }
4539
113M
        ctxt->input->col = ccol;
4540
113M
        if (*in == 0xA) {
4541
28.3M
            do {
4542
28.3M
                ctxt->input->line++; ctxt->input->col = 1;
4543
28.3M
                in++;
4544
28.3M
            } while (*in == 0xA);
4545
27.9M
            goto get_more;
4546
27.9M
        }
4547
86.0M
        if (*in == ']') {
4548
2.51M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
25.7k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
25.7k
                ctxt->input->cur = in + 1;
4551
25.7k
                return;
4552
25.7k
            }
4553
2.49M
            in++;
4554
2.49M
            ctxt->input->col++;
4555
2.49M
            goto get_more;
4556
2.51M
        }
4557
83.4M
        nbchar = in - ctxt->input->cur;
4558
83.4M
        if (nbchar > 0) {
4559
63.5M
            if ((ctxt->sax != NULL) &&
4560
63.5M
                (ctxt->sax->ignorableWhitespace !=
4561
63.5M
                 ctxt->sax->characters) &&
4562
63.5M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
12.3M
                const xmlChar *tmp = ctxt->input->cur;
4564
12.3M
                ctxt->input->cur = in;
4565
4566
12.3M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
6.85M
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
6.85M
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
6.85M
                                                       tmp, nbchar);
4570
6.85M
                } else {
4571
5.53M
                    if (ctxt->sax->characters != NULL)
4572
5.53M
                        ctxt->sax->characters(ctxt->userData,
4573
5.53M
                                              tmp, nbchar);
4574
5.53M
                    if (*ctxt->space == -1)
4575
1.68M
                        *ctxt->space = -2;
4576
5.53M
                }
4577
12.3M
                line = ctxt->input->line;
4578
12.3M
                col = ctxt->input->col;
4579
51.1M
            } else if (ctxt->sax != NULL) {
4580
51.1M
                if (ctxt->sax->characters != NULL)
4581
51.1M
                    ctxt->sax->characters(ctxt->userData,
4582
51.1M
                                          ctxt->input->cur, nbchar);
4583
51.1M
                line = ctxt->input->line;
4584
51.1M
                col = ctxt->input->col;
4585
51.1M
            }
4586
63.5M
        }
4587
83.4M
        ctxt->input->cur = in;
4588
83.4M
        if (*in == 0xD) {
4589
17.8M
            in++;
4590
17.8M
            if (*in == 0xA) {
4591
17.8M
                ctxt->input->cur = in;
4592
17.8M
                in++;
4593
17.8M
                ctxt->input->line++; ctxt->input->col = 1;
4594
17.8M
                continue; /* while */
4595
17.8M
            }
4596
46.2k
            in--;
4597
46.2k
        }
4598
65.6M
        if (*in == '<') {
4599
56.3M
            return;
4600
56.3M
        }
4601
9.26M
        if (*in == '&') {
4602
4.75M
            return;
4603
4.75M
        }
4604
4.50M
        SHRINK;
4605
4.50M
        GROW;
4606
4.50M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
4.50M
        in = ctxt->input->cur;
4609
22.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
22.3M
             (*in == 0x09) || (*in == 0x0a));
4611
4.53M
    ctxt->input->line = line;
4612
4.53M
    ctxt->input->col = col;
4613
4.53M
    xmlParseCharDataComplex(ctxt);
4614
4.53M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
4.53M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
4.53M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
4.53M
    int nbchar = 0;
4631
4.53M
    int cur, l;
4632
4.53M
    int count = 0;
4633
4634
4.53M
    SHRINK;
4635
4.53M
    GROW;
4636
4.53M
    cur = CUR_CHAR(l);
4637
88.2M
    while ((cur != '<') && /* checked */
4638
88.2M
           (cur != '&') &&
4639
88.2M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
83.7M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
10.9k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
10.9k
  }
4643
83.7M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
83.7M
  NEXTL(l);
4646
83.7M
  cur = CUR_CHAR(l);
4647
83.7M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
216k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
216k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
183k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
4.78k
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
4.78k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
4.78k
                                     buf, nbchar);
4658
178k
    } else {
4659
178k
        if (ctxt->sax->characters != NULL)
4660
178k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
178k
        if ((ctxt->sax->characters !=
4662
178k
             ctxt->sax->ignorableWhitespace) &&
4663
178k
      (*ctxt->space == -1))
4664
4.17k
      *ctxt->space = -2;
4665
178k
    }
4666
183k
      }
4667
216k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
216k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
216k
  }
4672
83.7M
  count++;
4673
83.7M
  if (count > 50) {
4674
1.29M
      SHRINK;
4675
1.29M
      GROW;
4676
1.29M
      count = 0;
4677
1.29M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.29M
  }
4680
83.7M
    }
4681
4.53M
    if (nbchar != 0) {
4682
1.45M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
1.45M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
1.28M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
3.26k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
3.26k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
1.27M
      } else {
4691
1.27M
    if (ctxt->sax->characters != NULL)
4692
1.27M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
1.27M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
1.27M
        (*ctxt->space == -1))
4695
232k
        *ctxt->space = -2;
4696
1.27M
      }
4697
1.28M
  }
4698
1.45M
    }
4699
4.53M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
2.89M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
2.89M
                          "PCDATA invalid Char value %d\n",
4703
2.89M
                    cur ? cur : CUR);
4704
2.89M
  NEXT;
4705
2.89M
    }
4706
4.53M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
3.81M
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
3.81M
    xmlChar *URI = NULL;
4735
4736
3.81M
    SHRINK;
4737
4738
3.81M
    *publicID = NULL;
4739
3.81M
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
1.82M
        SKIP(6);
4741
1.82M
  if (SKIP_BLANKS == 0) {
4742
2.93k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
2.93k
                     "Space required after 'SYSTEM'\n");
4744
2.93k
  }
4745
1.82M
  URI = xmlParseSystemLiteral(ctxt);
4746
1.82M
  if (URI == NULL) {
4747
3.63k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
3.63k
        }
4749
1.98M
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
1.71M
        SKIP(6);
4751
1.71M
  if (SKIP_BLANKS == 0) {
4752
6.48k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
6.48k
        "Space required after 'PUBLIC'\n");
4754
6.48k
  }
4755
1.71M
  *publicID = xmlParsePubidLiteral(ctxt);
4756
1.71M
  if (*publicID == NULL) {
4757
16.9k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
16.9k
  }
4759
1.71M
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
644k
      if (SKIP_BLANKS == 0) {
4764
18.2k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
18.2k
      "Space required after the Public Identifier\n");
4766
18.2k
      }
4767
1.07M
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
1.07M
      if (SKIP_BLANKS == 0) return(NULL);
4775
5.29k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
5.29k
  }
4777
645k
  URI = xmlParseSystemLiteral(ctxt);
4778
645k
  if (URI == NULL) {
4779
20.9k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
20.9k
        }
4781
645k
    }
4782
2.73M
    return(URI);
4783
3.81M
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
20.7M
                       size_t len, size_t size) {
4802
20.7M
    int q, ql;
4803
20.7M
    int r, rl;
4804
20.7M
    int cur, l;
4805
20.7M
    size_t count = 0;
4806
20.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
8.40M
                       XML_MAX_HUGE_LENGTH :
4808
20.7M
                       XML_MAX_TEXT_LENGTH;
4809
20.7M
    int inputid;
4810
4811
20.7M
    inputid = ctxt->input->id;
4812
4813
20.7M
    if (buf == NULL) {
4814
1.59M
        len = 0;
4815
1.59M
  size = XML_PARSER_BUFFER_SIZE;
4816
1.59M
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
1.59M
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
1.59M
    }
4822
20.7M
    GROW; /* Assure there's enough input data */
4823
20.7M
    q = CUR_CHAR(ql);
4824
20.7M
    if (q == 0)
4825
13.9M
        goto not_terminated;
4826
6.75M
    if (!IS_CHAR(q)) {
4827
19.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
19.9k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
19.9k
                    q);
4830
19.9k
  xmlFree (buf);
4831
19.9k
  return;
4832
19.9k
    }
4833
6.73M
    NEXTL(ql);
4834
6.73M
    r = CUR_CHAR(rl);
4835
6.73M
    if (r == 0)
4836
823k
        goto not_terminated;
4837
5.91M
    if (!IS_CHAR(r)) {
4838
2.26k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
2.26k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
2.26k
                    r);
4841
2.26k
  xmlFree (buf);
4842
2.26k
  return;
4843
2.26k
    }
4844
5.91M
    NEXTL(rl);
4845
5.91M
    cur = CUR_CHAR(l);
4846
5.91M
    if (cur == 0)
4847
185k
        goto not_terminated;
4848
175M
    while (IS_CHAR(cur) && /* checked */
4849
175M
           ((cur != '>') ||
4850
172M
      (r != '-') || (q != '-'))) {
4851
169M
  if ((r == '-') && (q == '-')) {
4852
5.62M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
5.62M
  }
4854
169M
  if (len + 5 >= size) {
4855
265k
      xmlChar *new_buf;
4856
265k
            size_t new_size;
4857
4858
265k
      new_size = size * 2;
4859
265k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
265k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
265k
      buf = new_buf;
4866
265k
            size = new_size;
4867
265k
  }
4868
169M
  COPY_BUF(ql,buf,len,q);
4869
169M
  q = r;
4870
169M
  ql = rl;
4871
169M
  r = cur;
4872
169M
  rl = l;
4873
4874
169M
  count++;
4875
169M
  if (count > 50) {
4876
2.63M
      SHRINK;
4877
2.63M
      GROW;
4878
2.63M
      count = 0;
4879
2.63M
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
2.63M
  }
4884
169M
  NEXTL(l);
4885
169M
  cur = CUR_CHAR(l);
4886
169M
  if (cur == 0) {
4887
2.89M
      SHRINK;
4888
2.89M
      GROW;
4889
2.89M
      cur = CUR_CHAR(l);
4890
2.89M
  }
4891
4892
169M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
169M
    }
4899
5.72M
    buf[len] = 0;
4900
5.72M
    if (cur == 0) {
4901
2.89M
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
2.89M
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
2.89M
    } else if (!IS_CHAR(cur)) {
4904
7.72k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
7.72k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
7.72k
                    cur);
4907
2.82M
    } else {
4908
2.82M
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
2.82M
        NEXT;
4914
2.82M
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
2.82M
      (!ctxt->disableSAX))
4916
2.78M
      ctxt->sax->comment(ctxt->userData, buf);
4917
2.82M
    }
4918
5.72M
    xmlFree(buf);
4919
5.72M
    return;
4920
14.9M
not_terminated:
4921
14.9M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
14.9M
       "Comment not terminated\n", NULL);
4923
14.9M
    xmlFree(buf);
4924
14.9M
    return;
4925
5.72M
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
383M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
383M
    xmlChar *buf = NULL;
4943
383M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
383M
    size_t len = 0;
4945
383M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
136M
                       XML_MAX_HUGE_LENGTH :
4947
383M
                       XML_MAX_TEXT_LENGTH;
4948
383M
    xmlParserInputState state;
4949
383M
    const xmlChar *in;
4950
383M
    size_t nbchar = 0;
4951
383M
    int ccol;
4952
383M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
383M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
383M
    SKIP(2);
4960
383M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
546
        return;
4962
383M
    state = ctxt->instate;
4963
383M
    ctxt->instate = XML_PARSER_COMMENT;
4964
383M
    inputid = ctxt->input->id;
4965
383M
    SKIP(2);
4966
383M
    SHRINK;
4967
383M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
383M
    in = ctxt->input->cur;
4974
383M
    do {
4975
383M
  if (*in == 0xA) {
4976
1.34M
      do {
4977
1.34M
    ctxt->input->line++; ctxt->input->col = 1;
4978
1.34M
    in++;
4979
1.34M
      } while (*in == 0xA);
4980
1.32M
  }
4981
445M
get_more:
4982
445M
        ccol = ctxt->input->col;
4983
1.94G
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
1.94G
         ((*in >= 0x20) && (*in < '-')) ||
4985
1.94G
         (*in == 0x09)) {
4986
1.49G
        in++;
4987
1.49G
        ccol++;
4988
1.49G
  }
4989
445M
  ctxt->input->col = ccol;
4990
445M
  if (*in == 0xA) {
4991
10.3M
      do {
4992
10.3M
    ctxt->input->line++; ctxt->input->col = 1;
4993
10.3M
    in++;
4994
10.3M
      } while (*in == 0xA);
4995
9.94M
      goto get_more;
4996
9.94M
  }
4997
436M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
436M
  if (nbchar > 0) {
5002
84.2M
      if ((ctxt->sax != NULL) &&
5003
84.2M
    (ctxt->sax->comment != NULL)) {
5004
84.2M
    if (buf == NULL) {
5005
45.5M
        if ((*in == '-') && (in[1] == '-'))
5006
24.0M
            size = nbchar + 1;
5007
21.4M
        else
5008
21.4M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
45.5M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
45.5M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
45.5M
        len = 0;
5016
45.5M
    } else if (len + nbchar + 1 >= size) {
5017
1.75M
        xmlChar *new_buf;
5018
1.75M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
1.75M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
1.75M
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
1.75M
        buf = new_buf;
5027
1.75M
    }
5028
84.2M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
84.2M
    len += nbchar;
5030
84.2M
    buf[len] = 0;
5031
84.2M
      }
5032
84.2M
  }
5033
436M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
436M
  ctxt->input->cur = in;
5040
436M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
436M
  if (*in == 0xD) {
5045
5.49M
      in++;
5046
5.49M
      if (*in == 0xA) {
5047
5.49M
    ctxt->input->cur = in;
5048
5.49M
    in++;
5049
5.49M
    ctxt->input->line++; ctxt->input->col = 1;
5050
5.49M
    goto get_more;
5051
5.49M
      }
5052
3.69k
      in--;
5053
3.69k
  }
5054
430M
  SHRINK;
5055
430M
  GROW;
5056
430M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
430M
  in = ctxt->input->cur;
5061
430M
  if (*in == '-') {
5062
409M
      if (in[1] == '-') {
5063
394M
          if (in[2] == '>') {
5064
362M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
362M
        SKIP(3);
5070
362M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
362M
            (!ctxt->disableSAX)) {
5072
246M
      if (buf != NULL)
5073
13.6M
          ctxt->sax->comment(ctxt->userData, buf);
5074
232M
      else
5075
232M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
246M
        }
5077
362M
        if (buf != NULL)
5078
26.4M
            xmlFree(buf);
5079
362M
        if (ctxt->instate != XML_PARSER_EOF)
5080
362M
      ctxt->instate = state;
5081
362M
        return;
5082
362M
    }
5083
31.6M
    if (buf != NULL) {
5084
20.9M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
20.9M
                          "Double hyphen within comment: "
5086
20.9M
                                      "<!--%.50s\n",
5087
20.9M
              buf);
5088
20.9M
    } else
5089
10.6M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
10.6M
                          "Double hyphen within comment\n", NULL);
5091
31.6M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
31.6M
    in++;
5096
31.6M
    ctxt->input->col++;
5097
31.6M
      }
5098
47.0M
      in++;
5099
47.0M
      ctxt->input->col++;
5100
47.0M
      goto get_more;
5101
409M
  }
5102
430M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
20.7M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
20.7M
    ctxt->instate = state;
5105
20.7M
    return;
5106
383M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
2.61M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
2.61M
    const xmlChar *name;
5125
5126
2.61M
    name = xmlParseName(ctxt);
5127
2.61M
    if ((name != NULL) &&
5128
2.61M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
2.61M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
2.61M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
761k
  int i;
5132
761k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
761k
      (name[2] == 'l') && (name[3] == 0)) {
5134
701k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
701k
     "XML declaration allowed only at the start of the document\n");
5136
701k
      return(name);
5137
701k
  } else if (name[3] == 0) {
5138
15.5k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
15.5k
      return(name);
5140
15.5k
  }
5141
84.4k
  for (i = 0;;i++) {
5142
84.4k
      if (xmlW3CPIs[i] == NULL) break;
5143
64.2k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
23.9k
          return(name);
5145
64.2k
  }
5146
20.1k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
20.1k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
20.1k
          NULL, NULL);
5149
20.1k
    }
5150
1.87M
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
7.52k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
7.52k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
7.52k
    }
5154
1.87M
    return(name);
5155
2.61M
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
0
    xmlChar *URL = NULL;
5176
0
    const xmlChar *tmp, *base;
5177
0
    xmlChar marker;
5178
5179
0
    tmp = catalog;
5180
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
0
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
0
error:
5211
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
0
            "Catalog PI syntax error: %s\n",
5213
0
      catalog, NULL);
5214
0
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
0
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
2.61M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
2.61M
    xmlChar *buf = NULL;
5235
2.61M
    size_t len = 0;
5236
2.61M
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
2.61M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
1.45M
                       XML_MAX_HUGE_LENGTH :
5239
2.61M
                       XML_MAX_TEXT_LENGTH;
5240
2.61M
    int cur, l;
5241
2.61M
    const xmlChar *target;
5242
2.61M
    xmlParserInputState state;
5243
2.61M
    int count = 0;
5244
5245
2.61M
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
2.61M
  int inputid = ctxt->input->id;
5247
2.61M
  state = ctxt->instate;
5248
2.61M
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
2.61M
  SKIP(2);
5253
2.61M
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
2.61M
        target = xmlParsePITarget(ctxt);
5260
2.61M
  if (target != NULL) {
5261
2.57M
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
452k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
452k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
452k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
452k
        (ctxt->sax->processingInstruction != NULL))
5274
357k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
357k
                                         target, NULL);
5276
452k
    if (ctxt->instate != XML_PARSER_EOF)
5277
452k
        ctxt->instate = state;
5278
452k
    return;
5279
452k
      }
5280
2.12M
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
2.12M
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
2.12M
      if (SKIP_BLANKS == 0) {
5287
425k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
425k
        "ParsePI: PI %s space expected\n", target);
5289
425k
      }
5290
2.12M
      cur = CUR_CHAR(l);
5291
164M
      while (IS_CHAR(cur) && /* checked */
5292
164M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
162M
    if (len + 5 >= size) {
5294
337k
        xmlChar *tmp;
5295
337k
                    size_t new_size = size * 2;
5296
337k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
337k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
337k
        buf = tmp;
5304
337k
                    size = new_size;
5305
337k
    }
5306
162M
    count++;
5307
162M
    if (count > 50) {
5308
2.21M
        SHRINK;
5309
2.21M
        GROW;
5310
2.21M
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
2.21M
        count = 0;
5315
2.21M
    }
5316
162M
    COPY_BUF(l,buf,len,cur);
5317
162M
    NEXTL(l);
5318
162M
    cur = CUR_CHAR(l);
5319
162M
    if (cur == 0) {
5320
978k
        SHRINK;
5321
978k
        GROW;
5322
978k
        cur = CUR_CHAR(l);
5323
978k
    }
5324
162M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
162M
      }
5332
2.12M
      buf[len] = 0;
5333
2.12M
      if (cur != '?') {
5334
1.00M
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
1.00M
          "ParsePI: PI %s never end ...\n", target);
5336
1.12M
      } else {
5337
1.12M
    if (inputid != ctxt->input->id) {
5338
34
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
34
                             "PI declaration doesn't start and stop in"
5340
34
                                   " the same entity\n");
5341
34
    }
5342
1.12M
    SKIP(2);
5343
5344
1.12M
#ifdef LIBXML_CATALOG_ENABLED
5345
1.12M
    if (((state == XML_PARSER_MISC) ||
5346
1.12M
               (state == XML_PARSER_START)) &&
5347
1.12M
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
0
      (allow == XML_CATA_ALLOW_ALL))
5351
0
      xmlParseCatalogPI(ctxt, buf);
5352
0
    }
5353
1.12M
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
1.12M
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
1.12M
        (ctxt->sax->processingInstruction != NULL))
5361
704k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
704k
                                         target, buf);
5363
1.12M
      }
5364
2.12M
      xmlFree(buf);
5365
2.12M
  } else {
5366
34.9k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
34.9k
  }
5368
2.15M
  if (ctxt->instate != XML_PARSER_EOF)
5369
2.15M
      ctxt->instate = state;
5370
2.15M
    }
5371
2.61M
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
1.99M
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
1.99M
    const xmlChar *name;
5394
1.99M
    xmlChar *Pubid;
5395
1.99M
    xmlChar *Systemid;
5396
5397
1.99M
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
1.99M
    SKIP(2);
5400
5401
1.99M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
1.99M
  int inputid = ctxt->input->id;
5403
1.99M
  SHRINK;
5404
1.99M
  SKIP(8);
5405
1.99M
  if (SKIP_BLANKS == 0) {
5406
3.02k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
3.02k
         "Space required after '<!NOTATION'\n");
5408
3.02k
      return;
5409
3.02k
  }
5410
5411
1.98M
        name = xmlParseName(ctxt);
5412
1.98M
  if (name == NULL) {
5413
1.25k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
1.25k
      return;
5415
1.25k
  }
5416
1.98M
  if (xmlStrchr(name, ':') != NULL) {
5417
9.22k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
9.22k
         "colons are forbidden from notation names '%s'\n",
5419
9.22k
         name, NULL, NULL);
5420
9.22k
  }
5421
1.98M
  if (SKIP_BLANKS == 0) {
5422
2.03k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
2.03k
         "Space required after the NOTATION name'\n");
5424
2.03k
      return;
5425
2.03k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
1.98M
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
1.98M
  SKIP_BLANKS;
5432
5433
1.98M
  if (RAW == '>') {
5434
655k
      if (inputid != ctxt->input->id) {
5435
408
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
408
                         "Notation declaration doesn't start and stop"
5437
408
                               " in the same entity\n");
5438
408
      }
5439
655k
      NEXT;
5440
655k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
655k
    (ctxt->sax->notationDecl != NULL))
5442
566k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
1.32M
  } else {
5444
1.32M
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
1.32M
  }
5446
1.98M
  if (Systemid != NULL) xmlFree(Systemid);
5447
1.98M
  if (Pubid != NULL) xmlFree(Pubid);
5448
1.98M
    }
5449
1.99M
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
6.25M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
6.25M
    const xmlChar *name = NULL;
5478
6.25M
    xmlChar *value = NULL;
5479
6.25M
    xmlChar *URI = NULL, *literal = NULL;
5480
6.25M
    const xmlChar *ndata = NULL;
5481
6.25M
    int isParameter = 0;
5482
6.25M
    xmlChar *orig = NULL;
5483
5484
6.25M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
6.25M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
6.25M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
6.25M
  int inputid = ctxt->input->id;
5491
6.25M
  SHRINK;
5492
6.25M
  SKIP(6);
5493
6.25M
  if (SKIP_BLANKS == 0) {
5494
18.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
18.2k
         "Space required after '<!ENTITY'\n");
5496
18.2k
  }
5497
5498
6.25M
  if (RAW == '%') {
5499
2.94M
      NEXT;
5500
2.94M
      if (SKIP_BLANKS == 0) {
5501
646
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
646
             "Space required after '%%'\n");
5503
646
      }
5504
2.94M
      isParameter = 1;
5505
2.94M
  }
5506
5507
6.25M
        name = xmlParseName(ctxt);
5508
6.25M
  if (name == NULL) {
5509
26.0k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
26.0k
                     "xmlParseEntityDecl: no name\n");
5511
26.0k
            return;
5512
26.0k
  }
5513
6.22M
  if (xmlStrchr(name, ':') != NULL) {
5514
3.14k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
3.14k
         "colons are forbidden from entities names '%s'\n",
5516
3.14k
         name, NULL, NULL);
5517
3.14k
  }
5518
6.22M
  if (SKIP_BLANKS == 0) {
5519
7.39k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
7.39k
         "Space required after the entity name\n");
5521
7.39k
  }
5522
5523
6.22M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
6.22M
  if (isParameter) {
5528
2.93M
      if ((RAW == '"') || (RAW == '\'')) {
5529
2.90M
          value = xmlParseEntityValue(ctxt, &orig);
5530
2.90M
    if (value) {
5531
2.88M
        if ((ctxt->sax != NULL) &&
5532
2.88M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
2.82M
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
2.82M
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
2.82M
            NULL, NULL, value);
5536
2.88M
    }
5537
2.90M
      } else {
5538
35.4k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
35.4k
    if ((URI == NULL) && (literal == NULL)) {
5540
3.07k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
3.07k
    }
5542
35.4k
    if (URI) {
5543
32.2k
        xmlURIPtr uri;
5544
5545
32.2k
        uri = xmlParseURI((const char *) URI);
5546
32.2k
        if (uri == NULL) {
5547
1.11k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.11k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
31.1k
        } else {
5555
31.1k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
114
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
31.0k
      } else {
5562
31.0k
          if ((ctxt->sax != NULL) &&
5563
31.0k
        (!ctxt->disableSAX) &&
5564
31.0k
        (ctxt->sax->entityDecl != NULL))
5565
30.4k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
30.4k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
30.4k
              literal, URI, NULL);
5568
31.0k
      }
5569
31.1k
      xmlFreeURI(uri);
5570
31.1k
        }
5571
32.2k
    }
5572
35.4k
      }
5573
3.28M
  } else {
5574
3.28M
      if ((RAW == '"') || (RAW == '\'')) {
5575
1.98M
          value = xmlParseEntityValue(ctxt, &orig);
5576
1.98M
    if ((ctxt->sax != NULL) &&
5577
1.98M
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
1.85M
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
1.85M
        XML_INTERNAL_GENERAL_ENTITY,
5580
1.85M
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
1.98M
    if ((ctxt->myDoc == NULL) ||
5585
1.98M
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
15.9k
        if (ctxt->myDoc == NULL) {
5587
1.72k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
1.72k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
1.72k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
1.72k
        }
5594
15.9k
        if (ctxt->myDoc->intSubset == NULL)
5595
1.72k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
1.72k
              BAD_CAST "fake", NULL, NULL);
5597
5598
15.9k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
15.9k
                    NULL, NULL, value);
5600
15.9k
    }
5601
1.98M
      } else {
5602
1.30M
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
1.30M
    if ((URI == NULL) && (literal == NULL)) {
5604
13.4k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
13.4k
    }
5606
1.30M
    if (URI) {
5607
1.27M
        xmlURIPtr uri;
5608
5609
1.27M
        uri = xmlParseURI((const char *)URI);
5610
1.27M
        if (uri == NULL) {
5611
51.7k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
51.7k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
1.22M
        } else {
5619
1.22M
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
5.26k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
5.26k
      }
5626
1.22M
      xmlFreeURI(uri);
5627
1.22M
        }
5628
1.27M
    }
5629
1.30M
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
17.1k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
17.1k
           "Space required before 'NDATA'\n");
5632
17.1k
    }
5633
1.30M
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
423k
        SKIP(5);
5635
423k
        if (SKIP_BLANKS == 0) {
5636
7.79k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
7.79k
               "Space required after 'NDATA'\n");
5638
7.79k
        }
5639
423k
        ndata = xmlParseName(ctxt);
5640
423k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
423k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
405k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
405k
            literal, URI, ndata);
5644
882k
    } else {
5645
882k
        if ((ctxt->sax != NULL) &&
5646
882k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
560k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
560k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
560k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
882k
        if ((ctxt->replaceEntities != 0) &&
5655
882k
      ((ctxt->myDoc == NULL) ||
5656
507k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
3.02k
      if (ctxt->myDoc == NULL) {
5658
2.22k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
2.22k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
2.22k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
2.22k
      }
5665
5666
3.02k
      if (ctxt->myDoc->intSubset == NULL)
5667
2.22k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
2.22k
            BAD_CAST "fake", NULL, NULL);
5669
3.02k
      xmlSAX2EntityDecl(ctxt, name,
5670
3.02k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
3.02k
                  literal, URI, NULL);
5672
3.02k
        }
5673
882k
    }
5674
1.30M
      }
5675
3.28M
  }
5676
6.22M
  if (ctxt->instate == XML_PARSER_EOF)
5677
881
      goto done;
5678
6.22M
  SKIP_BLANKS;
5679
6.22M
  if (RAW != '>') {
5680
26.8k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
26.8k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
26.8k
      xmlHaltParser(ctxt);
5683
6.19M
  } else {
5684
6.19M
      if (inputid != ctxt->input->id) {
5685
2.49k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
2.49k
                         "Entity declaration doesn't start and stop in"
5687
2.49k
                               " the same entity\n");
5688
2.49k
      }
5689
6.19M
      NEXT;
5690
6.19M
  }
5691
6.22M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
4.85M
      xmlEntityPtr cur = NULL;
5696
5697
4.85M
      if (isParameter) {
5698
2.88M
          if ((ctxt->sax != NULL) &&
5699
2.88M
        (ctxt->sax->getParameterEntity != NULL))
5700
2.88M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
2.88M
      } else {
5702
1.96M
          if ((ctxt->sax != NULL) &&
5703
1.96M
        (ctxt->sax->getEntity != NULL))
5704
1.96M
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
1.96M
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
101k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
101k
    }
5708
1.96M
      }
5709
4.85M
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
4.30M
    cur->orig = orig;
5711
4.30M
                orig = NULL;
5712
4.30M
      }
5713
4.85M
  }
5714
5715
6.22M
done:
5716
6.22M
  if (value != NULL) xmlFree(value);
5717
6.22M
  if (URI != NULL) xmlFree(URI);
5718
6.22M
  if (literal != NULL) xmlFree(literal);
5719
6.22M
        if (orig != NULL) xmlFree(orig);
5720
6.22M
    }
5721
6.25M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
14.9M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
14.9M
    int val;
5757
14.9M
    xmlChar *ret;
5758
5759
14.9M
    *value = NULL;
5760
14.9M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
986k
  SKIP(9);
5762
986k
  return(XML_ATTRIBUTE_REQUIRED);
5763
986k
    }
5764
13.9M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
12.7M
  SKIP(8);
5766
12.7M
  return(XML_ATTRIBUTE_IMPLIED);
5767
12.7M
    }
5768
1.19M
    val = XML_ATTRIBUTE_NONE;
5769
1.19M
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
789k
  SKIP(6);
5771
789k
  val = XML_ATTRIBUTE_FIXED;
5772
789k
  if (SKIP_BLANKS == 0) {
5773
1.05k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
1.05k
         "Space required after '#FIXED'\n");
5775
1.05k
  }
5776
789k
    }
5777
1.19M
    ret = xmlParseAttValue(ctxt);
5778
1.19M
    ctxt->instate = XML_PARSER_DTD;
5779
1.19M
    if (ret == NULL) {
5780
20.3k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
20.3k
           "Attribute default value declaration error\n");
5782
20.3k
    } else
5783
1.17M
        *value = ret;
5784
1.19M
    return(val);
5785
13.9M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
330k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
330k
    const xmlChar *name;
5809
330k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
330k
    if (RAW != '(') {
5812
1.01k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1.01k
  return(NULL);
5814
1.01k
    }
5815
329k
    SHRINK;
5816
334k
    do {
5817
334k
        NEXT;
5818
334k
  SKIP_BLANKS;
5819
334k
        name = xmlParseName(ctxt);
5820
334k
  if (name == NULL) {
5821
700
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
700
         "Name expected in NOTATION declaration\n");
5823
700
            xmlFreeEnumeration(ret);
5824
700
      return(NULL);
5825
700
  }
5826
333k
  tmp = ret;
5827
344k
  while (tmp != NULL) {
5828
12.3k
      if (xmlStrEqual(name, tmp->name)) {
5829
1.45k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
1.45k
    "standalone: attribute notation value token %s duplicated\n",
5831
1.45k
         name, NULL);
5832
1.45k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
1.45k
    break;
5835
1.45k
      }
5836
10.9k
      tmp = tmp->next;
5837
10.9k
  }
5838
333k
  if (tmp == NULL) {
5839
331k
      cur = xmlCreateEnumeration(name);
5840
331k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
331k
      if (last == NULL) ret = last = cur;
5845
3.13k
      else {
5846
3.13k
    last->next = cur;
5847
3.13k
    last = cur;
5848
3.13k
      }
5849
331k
  }
5850
333k
  SKIP_BLANKS;
5851
333k
    } while (RAW == '|');
5852
328k
    if (RAW != ')') {
5853
5.17k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
5.17k
        xmlFreeEnumeration(ret);
5855
5.17k
  return(NULL);
5856
5.17k
    }
5857
323k
    NEXT;
5858
323k
    return(ret);
5859
328k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
1.15M
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
1.15M
    xmlChar *name;
5881
1.15M
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
1.15M
    if (RAW != '(') {
5884
28.0k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
28.0k
  return(NULL);
5886
28.0k
    }
5887
1.12M
    SHRINK;
5888
3.15M
    do {
5889
3.15M
        NEXT;
5890
3.15M
  SKIP_BLANKS;
5891
3.15M
        name = xmlParseNmtoken(ctxt);
5892
3.15M
  if (name == NULL) {
5893
1.99k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
1.99k
      return(ret);
5895
1.99k
  }
5896
3.15M
  tmp = ret;
5897
7.95M
  while (tmp != NULL) {
5898
4.80M
      if (xmlStrEqual(name, tmp->name)) {
5899
891
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
891
    "standalone: attribute enumeration value token %s duplicated\n",
5901
891
         name, NULL);
5902
891
    if (!xmlDictOwns(ctxt->dict, name))
5903
891
        xmlFree(name);
5904
891
    break;
5905
891
      }
5906
4.80M
      tmp = tmp->next;
5907
4.80M
  }
5908
3.15M
  if (tmp == NULL) {
5909
3.15M
      cur = xmlCreateEnumeration(name);
5910
3.15M
      if (!xmlDictOwns(ctxt->dict, name))
5911
3.15M
    xmlFree(name);
5912
3.15M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
3.15M
      if (last == NULL) ret = last = cur;
5917
2.03M
      else {
5918
2.03M
    last->next = cur;
5919
2.03M
    last = cur;
5920
2.03M
      }
5921
3.15M
  }
5922
3.15M
  SKIP_BLANKS;
5923
3.15M
    } while (RAW == '|');
5924
1.12M
    if (RAW != ')') {
5925
21.2k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
21.2k
  return(ret);
5927
21.2k
    }
5928
1.09M
    NEXT;
5929
1.09M
    return(ret);
5930
1.12M
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
1.48M
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
1.48M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
337k
  SKIP(8);
5953
337k
  if (SKIP_BLANKS == 0) {
5954
7.07k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
7.07k
         "Space required after 'NOTATION'\n");
5956
7.07k
      return(0);
5957
7.07k
  }
5958
330k
  *tree = xmlParseNotationType(ctxt);
5959
330k
  if (*tree == NULL) return(0);
5960
323k
  return(XML_ATTRIBUTE_NOTATION);
5961
330k
    }
5962
1.15M
    *tree = xmlParseEnumerationType(ctxt);
5963
1.15M
    if (*tree == NULL) return(0);
5964
1.12M
    return(XML_ATTRIBUTE_ENUMERATION);
5965
1.15M
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
15.0M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
15.0M
    SHRINK;
6017
15.0M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
4.42M
  SKIP(5);
6019
4.42M
  return(XML_ATTRIBUTE_CDATA);
6020
10.6M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
420k
  SKIP(6);
6022
420k
  return(XML_ATTRIBUTE_IDREFS);
6023
10.1M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
749k
  SKIP(5);
6025
749k
  return(XML_ATTRIBUTE_IDREF);
6026
9.44M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
3.99M
        SKIP(2);
6028
3.99M
  return(XML_ATTRIBUTE_ID);
6029
5.44M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
36.1k
  SKIP(6);
6031
36.1k
  return(XML_ATTRIBUTE_ENTITY);
6032
5.41M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
221k
  SKIP(8);
6034
221k
  return(XML_ATTRIBUTE_ENTITIES);
6035
5.18M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
383k
  SKIP(8);
6037
383k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
4.80M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
3.31M
  SKIP(7);
6040
3.31M
  return(XML_ATTRIBUTE_NMTOKEN);
6041
3.31M
     }
6042
1.48M
     return(xmlParseEnumeratedType(ctxt, tree));
6043
15.0M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
6.53M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
6.53M
    const xmlChar *elemName;
6061
6.53M
    const xmlChar *attrName;
6062
6.53M
    xmlEnumerationPtr tree;
6063
6064
6.53M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
6.53M
    SKIP(2);
6067
6068
6.53M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
6.53M
  int inputid = ctxt->input->id;
6070
6071
6.53M
  SKIP(7);
6072
6.53M
  if (SKIP_BLANKS == 0) {
6073
101k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
101k
                     "Space required after '<!ATTLIST'\n");
6075
101k
  }
6076
6.53M
        elemName = xmlParseName(ctxt);
6077
6.53M
  if (elemName == NULL) {
6078
12.7k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
12.7k
         "ATTLIST: no name for Element\n");
6080
12.7k
      return;
6081
12.7k
  }
6082
6.52M
  SKIP_BLANKS;
6083
6.52M
  GROW;
6084
21.4M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
15.1M
      int type;
6086
15.1M
      int def;
6087
15.1M
      xmlChar *defaultValue = NULL;
6088
6089
15.1M
      GROW;
6090
15.1M
            tree = NULL;
6091
15.1M
      attrName = xmlParseName(ctxt);
6092
15.1M
      if (attrName == NULL) {
6093
104k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
104k
             "ATTLIST: no name for Attribute\n");
6095
104k
    break;
6096
104k
      }
6097
15.0M
      GROW;
6098
15.0M
      if (SKIP_BLANKS == 0) {
6099
13.8k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
13.8k
            "Space required after the attribute name\n");
6101
13.8k
    break;
6102
13.8k
      }
6103
6104
15.0M
      type = xmlParseAttributeType(ctxt, &tree);
6105
15.0M
      if (type <= 0) {
6106
43.6k
          break;
6107
43.6k
      }
6108
6109
14.9M
      GROW;
6110
14.9M
      if (SKIP_BLANKS == 0) {
6111
56.8k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
56.8k
             "Space required after the attribute type\n");
6113
56.8k
          if (tree != NULL)
6114
22.3k
        xmlFreeEnumeration(tree);
6115
56.8k
    break;
6116
56.8k
      }
6117
6118
14.9M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
14.9M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
14.9M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
351k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
14.9M
      GROW;
6130
14.9M
            if (RAW != '>') {
6131
12.6M
    if (SKIP_BLANKS == 0) {
6132
34.9k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
34.9k
      "Space required after the attribute default value\n");
6134
34.9k
        if (defaultValue != NULL)
6135
15.8k
      xmlFree(defaultValue);
6136
34.9k
        if (tree != NULL)
6137
2.35k
      xmlFreeEnumeration(tree);
6138
34.9k
        break;
6139
34.9k
    }
6140
12.6M
      }
6141
14.9M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
14.9M
    (ctxt->sax->attributeDecl != NULL))
6143
13.8M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
13.8M
                          type, def, defaultValue, tree);
6145
1.04M
      else if (tree != NULL)
6146
186k
    xmlFreeEnumeration(tree);
6147
6148
14.9M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
14.9M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
14.9M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
685k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
685k
      }
6153
14.9M
      if (ctxt->sax2) {
6154
8.40M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
8.40M
      }
6156
14.9M
      if (defaultValue != NULL)
6157
1.15M
          xmlFree(defaultValue);
6158
14.9M
      GROW;
6159
14.9M
  }
6160
6.52M
  if (RAW == '>') {
6161
6.30M
      if (inputid != ctxt->input->id) {
6162
6.29k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
6.29k
                               "Attribute list declaration doesn't start and"
6164
6.29k
                               " stop in the same entity\n");
6165
6.29k
      }
6166
6.30M
      NEXT;
6167
6.30M
  }
6168
6.52M
    }
6169
6.53M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
2.07M
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
2.07M
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
2.07M
    const xmlChar *elem = NULL;
6196
6197
2.07M
    GROW;
6198
2.07M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
2.07M
  SKIP(7);
6200
2.07M
  SKIP_BLANKS;
6201
2.07M
  SHRINK;
6202
2.07M
  if (RAW == ')') {
6203
1.18M
      if (ctxt->input->id != inputchk) {
6204
53
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
53
                               "Element content declaration doesn't start and"
6206
53
                               " stop in the same entity\n");
6207
53
      }
6208
1.18M
      NEXT;
6209
1.18M
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
1.18M
      if (ret == NULL)
6211
0
          return(NULL);
6212
1.18M
      if (RAW == '*') {
6213
237
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
237
    NEXT;
6215
237
      }
6216
1.18M
      return(ret);
6217
1.18M
  }
6218
888k
  if ((RAW == '(') || (RAW == '|')) {
6219
887k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
887k
      if (ret == NULL) return(NULL);
6221
887k
  }
6222
10.1M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
9.29M
      NEXT;
6224
9.29M
      if (elem == NULL) {
6225
887k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
887k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
887k
    ret->c1 = cur;
6231
887k
    if (cur != NULL)
6232
887k
        cur->parent = ret;
6233
887k
    cur = ret;
6234
8.40M
      } else {
6235
8.40M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
8.40M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
8.40M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
8.40M
    if (n->c1 != NULL)
6242
8.40M
        n->c1->parent = n;
6243
8.40M
          cur->c2 = n;
6244
8.40M
    if (n != NULL)
6245
8.40M
        n->parent = cur;
6246
8.40M
    cur = n;
6247
8.40M
      }
6248
9.29M
      SKIP_BLANKS;
6249
9.29M
      elem = xmlParseName(ctxt);
6250
9.29M
      if (elem == NULL) {
6251
902
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
902
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
902
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
902
    return(NULL);
6255
902
      }
6256
9.29M
      SKIP_BLANKS;
6257
9.29M
      GROW;
6258
9.29M
  }
6259
887k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
883k
      if (elem != NULL) {
6261
883k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
883k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
883k
    if (cur->c2 != NULL)
6264
883k
        cur->c2->parent = cur;
6265
883k
            }
6266
883k
            if (ret != NULL)
6267
883k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
883k
      if (ctxt->input->id != inputchk) {
6269
528
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
528
                               "Element content declaration doesn't start and"
6271
528
                               " stop in the same entity\n");
6272
528
      }
6273
883k
      SKIP(2);
6274
883k
  } else {
6275
3.70k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
3.70k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
3.70k
      return(NULL);
6278
3.70k
  }
6279
6280
887k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
883k
    return(ret);
6284
2.07M
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
2.94M
                                       int depth) {
6321
2.94M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
2.94M
    const xmlChar *elem;
6323
2.94M
    xmlChar type = 0;
6324
6325
2.94M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
2.94M
        (depth >  2048)) {
6327
84
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
84
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
84
                          depth);
6330
84
  return(NULL);
6331
84
    }
6332
2.94M
    SKIP_BLANKS;
6333
2.94M
    GROW;
6334
2.94M
    if (RAW == '(') {
6335
173k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
173k
  NEXT;
6339
173k
  SKIP_BLANKS;
6340
173k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
173k
                                                           depth + 1);
6342
173k
        if (cur == NULL)
6343
76.0k
            return(NULL);
6344
97.3k
  SKIP_BLANKS;
6345
97.3k
  GROW;
6346
2.77M
    } else {
6347
2.77M
  elem = xmlParseName(ctxt);
6348
2.77M
  if (elem == NULL) {
6349
6.89k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
6.89k
      return(NULL);
6351
6.89k
  }
6352
2.76M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
2.76M
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
2.76M
  GROW;
6358
2.76M
  if (RAW == '?') {
6359
190k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
190k
      NEXT;
6361
2.57M
  } else if (RAW == '*') {
6362
138k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
138k
      NEXT;
6364
2.44M
  } else if (RAW == '+') {
6365
620k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
620k
      NEXT;
6367
1.81M
  } else {
6368
1.81M
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
1.81M
  }
6370
2.76M
  GROW;
6371
2.76M
    }
6372
2.86M
    SKIP_BLANKS;
6373
2.86M
    SHRINK;
6374
13.7M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
10.8M
        if (RAW == ',') {
6379
2.41M
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
1.44M
      else if (type != CUR) {
6385
176
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
176
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
176
                      type);
6388
176
    if ((last != NULL) && (last != ret))
6389
176
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
176
    if (ret != NULL)
6391
176
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
176
    return(NULL);
6393
176
      }
6394
2.41M
      NEXT;
6395
6396
2.41M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
2.41M
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
2.41M
      if (last == NULL) {
6404
972k
    op->c1 = ret;
6405
972k
    if (ret != NULL)
6406
972k
        ret->parent = op;
6407
972k
    ret = cur = op;
6408
1.44M
      } else {
6409
1.44M
          cur->c2 = op;
6410
1.44M
    if (op != NULL)
6411
1.44M
        op->parent = cur;
6412
1.44M
    op->c1 = last;
6413
1.44M
    if (last != NULL)
6414
1.44M
        last->parent = op;
6415
1.44M
    cur =op;
6416
1.44M
    last = NULL;
6417
1.44M
      }
6418
8.44M
  } else if (RAW == '|') {
6419
8.43M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
7.49M
      else if (type != CUR) {
6425
135
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
135
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
135
          type);
6428
135
    if ((last != NULL) && (last != ret))
6429
135
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
135
    if (ret != NULL)
6431
135
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
135
    return(NULL);
6433
135
      }
6434
8.43M
      NEXT;
6435
6436
8.43M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
8.43M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
8.43M
      if (last == NULL) {
6445
935k
    op->c1 = ret;
6446
935k
    if (ret != NULL)
6447
935k
        ret->parent = op;
6448
935k
    ret = cur = op;
6449
7.49M
      } else {
6450
7.49M
          cur->c2 = op;
6451
7.49M
    if (op != NULL)
6452
7.49M
        op->parent = cur;
6453
7.49M
    op->c1 = last;
6454
7.49M
    if (last != NULL)
6455
7.49M
        last->parent = op;
6456
7.49M
    cur =op;
6457
7.49M
    last = NULL;
6458
7.49M
      }
6459
8.43M
  } else {
6460
14.8k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
14.8k
      if ((last != NULL) && (last != ret))
6462
4.82k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
14.8k
      if (ret != NULL)
6464
14.8k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
14.8k
      return(NULL);
6466
14.8k
  }
6467
10.8M
  GROW;
6468
10.8M
  SKIP_BLANKS;
6469
10.8M
  GROW;
6470
10.8M
  if (RAW == '(') {
6471
476k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
476k
      NEXT;
6474
476k
      SKIP_BLANKS;
6475
476k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
476k
                                                          depth + 1);
6477
476k
            if (last == NULL) {
6478
1.82k
    if (ret != NULL)
6479
1.82k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
1.82k
    return(NULL);
6481
1.82k
            }
6482
474k
      SKIP_BLANKS;
6483
10.3M
  } else {
6484
10.3M
      elem = xmlParseName(ctxt);
6485
10.3M
      if (elem == NULL) {
6486
2.57k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.57k
    if (ret != NULL)
6488
2.57k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.57k
    return(NULL);
6490
2.57k
      }
6491
10.3M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
10.3M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
10.3M
      if (RAW == '?') {
6498
784k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
784k
    NEXT;
6500
9.58M
      } else if (RAW == '*') {
6501
510k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
510k
    NEXT;
6503
9.07M
      } else if (RAW == '+') {
6504
133k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
133k
    NEXT;
6506
8.94M
      } else {
6507
8.94M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
8.94M
      }
6509
10.3M
  }
6510
10.8M
  SKIP_BLANKS;
6511
10.8M
  GROW;
6512
10.8M
    }
6513
2.84M
    if ((cur != NULL) && (last != NULL)) {
6514
1.89M
        cur->c2 = last;
6515
1.89M
  if (last != NULL)
6516
1.89M
      last->parent = cur;
6517
1.89M
    }
6518
2.84M
    if (ctxt->input->id != inputchk) {
6519
1.59k
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
1.59k
                       "Element content declaration doesn't start and stop in"
6521
1.59k
                       " the same entity\n");
6522
1.59k
    }
6523
2.84M
    NEXT;
6524
2.84M
    if (RAW == '?') {
6525
56.4k
  if (ret != NULL) {
6526
56.4k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
56.4k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
218
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
56.2k
      else
6530
56.2k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
56.4k
  }
6532
56.4k
  NEXT;
6533
2.79M
    } else if (RAW == '*') {
6534
563k
  if (ret != NULL) {
6535
563k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
563k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
5.20M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
4.64M
    if ((cur->c1 != NULL) &&
6543
4.64M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
4.64M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
22.0k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
4.64M
    if ((cur->c2 != NULL) &&
6547
4.64M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
4.64M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
3.73k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
4.64M
    cur = cur->c2;
6551
4.64M
      }
6552
563k
  }
6553
563k
  NEXT;
6554
2.22M
    } else if (RAW == '+') {
6555
460k
  if (ret != NULL) {
6556
460k
      int found = 0;
6557
6558
460k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
460k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
325
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
459k
      else
6562
459k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
830k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
370k
    if ((cur->c1 != NULL) &&
6570
370k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
370k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
1.29k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
1.29k
        found = 1;
6574
1.29k
    }
6575
370k
    if ((cur->c2 != NULL) &&
6576
370k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
370k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
337
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
337
        found = 1;
6580
337
    }
6581
370k
    cur = cur->c2;
6582
370k
      }
6583
460k
      if (found)
6584
1.37k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
460k
  }
6586
460k
  NEXT;
6587
460k
    }
6588
2.84M
    return(ret);
6589
2.86M
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
4.37M
                           xmlElementContentPtr *result) {
6648
6649
4.37M
    xmlElementContentPtr tree = NULL;
6650
4.37M
    int inputid = ctxt->input->id;
6651
4.37M
    int res;
6652
6653
4.37M
    *result = NULL;
6654
6655
4.37M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
4.37M
    NEXT;
6661
4.37M
    GROW;
6662
4.37M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
4.37M
    SKIP_BLANKS;
6665
4.37M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
2.07M
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
2.07M
  res = XML_ELEMENT_TYPE_MIXED;
6668
2.29M
    } else {
6669
2.29M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
2.29M
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
2.29M
    }
6672
4.37M
    SKIP_BLANKS;
6673
4.37M
    *result = tree;
6674
4.37M
    return(res);
6675
4.37M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
6.17M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
6.17M
    const xmlChar *name;
6695
6.17M
    int ret = -1;
6696
6.17M
    xmlElementContentPtr content  = NULL;
6697
6698
6.17M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
6.17M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
6.17M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
6.17M
  int inputid = ctxt->input->id;
6705
6706
6.17M
  SKIP(7);
6707
6.17M
  if (SKIP_BLANKS == 0) {
6708
2.16k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
2.16k
               "Space required after 'ELEMENT'\n");
6710
2.16k
      return(-1);
6711
2.16k
  }
6712
6.16M
        name = xmlParseName(ctxt);
6713
6.16M
  if (name == NULL) {
6714
1.63k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
1.63k
         "xmlParseElementDecl: no name for Element\n");
6716
1.63k
      return(-1);
6717
1.63k
  }
6718
6.16M
  if (SKIP_BLANKS == 0) {
6719
46.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
46.3k
         "Space required after the element name\n");
6721
46.3k
  }
6722
6.16M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
699k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
699k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
5.46M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
5.46M
             (NXT(2) == 'Y')) {
6730
460k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
460k
      ret = XML_ELEMENT_TYPE_ANY;
6735
5.00M
  } else if (RAW == '(') {
6736
4.37M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
4.37M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
637k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
637k
          (ctxt->inputNr == 1)) {
6743
325
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
325
    "PEReference: forbidden within markup decl in internal subset\n");
6745
636k
      } else {
6746
636k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
636k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
636k
            }
6749
637k
      return(-1);
6750
637k
  }
6751
6752
5.53M
  SKIP_BLANKS;
6753
6754
5.53M
  if (RAW != '>') {
6755
26.6k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
26.6k
      if (content != NULL) {
6757
6.98k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
6.98k
      }
6759
5.50M
  } else {
6760
5.50M
      if (inputid != ctxt->input->id) {
6761
2.23k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
2.23k
                               "Element declaration doesn't start and stop in"
6763
2.23k
                               " the same entity\n");
6764
2.23k
      }
6765
6766
5.50M
      NEXT;
6767
5.50M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
5.50M
    (ctxt->sax->elementDecl != NULL)) {
6769
5.14M
    if (content != NULL)
6770
4.03M
        content->parent = NULL;
6771
5.14M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
5.14M
                           content);
6773
5.14M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
117k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
117k
    }
6782
5.14M
      } else if (content != NULL) {
6783
294k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
294k
      }
6785
5.50M
  }
6786
5.53M
    }
6787
5.53M
    return(ret);
6788
6.17M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
1.92M
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
1.92M
    int *inputIds = NULL;
6806
1.92M
    size_t inputIdsSize = 0;
6807
1.92M
    size_t depth = 0;
6808
6809
3.86M
    while (ctxt->instate != XML_PARSER_EOF) {
6810
3.86M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
1.94M
            int id = ctxt->input->id;
6812
6813
1.94M
            SKIP(3);
6814
1.94M
            SKIP_BLANKS;
6815
6816
1.94M
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
1.90M
                SKIP(7);
6818
1.90M
                SKIP_BLANKS;
6819
1.90M
                if (RAW != '[') {
6820
220
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
220
                    xmlHaltParser(ctxt);
6822
220
                    goto error;
6823
220
                }
6824
1.90M
                if (ctxt->input->id != id) {
6825
65
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
65
                                   "All markup of the conditional section is"
6827
65
                                   " not in the same entity\n");
6828
65
                }
6829
1.90M
                NEXT;
6830
6831
1.90M
                if (inputIdsSize <= depth) {
6832
1.88M
                    int *tmp;
6833
6834
1.88M
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
1.88M
                    tmp = (int *) xmlRealloc(inputIds,
6836
1.88M
                            inputIdsSize * sizeof(int));
6837
1.88M
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
1.88M
                    inputIds = tmp;
6842
1.88M
                }
6843
1.90M
                inputIds[depth] = id;
6844
1.90M
                depth++;
6845
1.90M
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
36.7k
                size_t ignoreDepth = 0;
6847
6848
36.7k
                SKIP(6);
6849
36.7k
                SKIP_BLANKS;
6850
36.7k
                if (RAW != '[') {
6851
120
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
120
                    xmlHaltParser(ctxt);
6853
120
                    goto error;
6854
120
                }
6855
36.6k
                if (ctxt->input->id != id) {
6856
7
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
7
                                   "All markup of the conditional section is"
6858
7
                                   " not in the same entity\n");
6859
7
                }
6860
36.6k
                NEXT;
6861
6862
15.7M
                while (RAW != 0) {
6863
15.7M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
390k
                        SKIP(3);
6865
390k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
390k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
15.3M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
15.3M
                               (NXT(2) == '>')) {
6873
367k
                        if (ignoreDepth == 0)
6874
34.0k
                            break;
6875
333k
                        SKIP(3);
6876
333k
                        ignoreDepth--;
6877
14.9M
                    } else {
6878
14.9M
                        NEXT;
6879
14.9M
                    }
6880
15.7M
                }
6881
6882
36.6k
    if (RAW == 0) {
6883
2.56k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.56k
                    goto error;
6885
2.56k
    }
6886
34.0k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
34.0k
                SKIP(3);
6892
34.0k
            } else {
6893
1.97k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
1.97k
                xmlHaltParser(ctxt);
6895
1.97k
                goto error;
6896
1.97k
            }
6897
1.94M
        } else if ((depth > 0) &&
6898
1.92M
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
1.88M
            depth--;
6900
1.88M
            if (ctxt->input->id != inputIds[depth]) {
6901
276
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
276
                               "All markup of the conditional section is not"
6903
276
                               " in the same entity\n");
6904
276
            }
6905
1.88M
            SKIP(3);
6906
1.88M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
31.9k
            xmlParseMarkupDecl(ctxt);
6908
31.9k
        } else {
6909
2.27k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
2.27k
            xmlHaltParser(ctxt);
6911
2.27k
            goto error;
6912
2.27k
        }
6913
6914
3.85M
        if (depth == 0)
6915
1.91M
            break;
6916
6917
1.94M
        SKIP_BLANKS;
6918
1.94M
        GROW;
6919
1.94M
    }
6920
6921
1.92M
error:
6922
1.92M
    xmlFree(inputIds);
6923
1.92M
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
406M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
406M
    GROW;
6952
406M
    if (CUR == '<') {
6953
406M
        if (NXT(1) == '!') {
6954
403M
      switch (NXT(2)) {
6955
12.4M
          case 'E':
6956
12.4M
        if (NXT(3) == 'L')
6957
6.17M
      xmlParseElementDecl(ctxt);
6958
6.25M
        else if (NXT(3) == 'N')
6959
6.25M
      xmlParseEntityDecl(ctxt);
6960
1.64k
                    else
6961
1.64k
                        SKIP(2);
6962
12.4M
        break;
6963
6.53M
          case 'A':
6964
6.53M
        xmlParseAttributeListDecl(ctxt);
6965
6.53M
        break;
6966
1.99M
          case 'N':
6967
1.99M
        xmlParseNotationDecl(ctxt);
6968
1.99M
        break;
6969
382M
          case '-':
6970
382M
        xmlParseComment(ctxt);
6971
382M
        break;
6972
506k
    default:
6973
        /* there is an error but it will be detected later */
6974
506k
                    SKIP(2);
6975
506k
        break;
6976
403M
      }
6977
403M
  } else if (NXT(1) == '?') {
6978
2.27M
      xmlParsePI(ctxt);
6979
2.27M
  }
6980
406M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
406M
    if (ctxt->instate == XML_PARSER_EOF)
6987
27.9k
        return;
6988
6989
406M
    ctxt->instate = XML_PARSER_DTD;
6990
406M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
24.6k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
24.6k
    xmlChar *version;
7006
24.6k
    const xmlChar *encoding;
7007
24.6k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
24.6k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
24.2k
  SKIP(5);
7014
24.2k
    } else {
7015
372
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
372
  return;
7017
372
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
24.2k
    oldstate = ctxt->instate;
7021
24.2k
    ctxt->instate = XML_PARSER_START;
7022
7023
24.2k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
24.2k
    version = xmlParseVersionInfo(ctxt);
7032
24.2k
    if (version == NULL)
7033
2.69k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
21.5k
    else {
7035
21.5k
  if (SKIP_BLANKS == 0) {
7036
1.17k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.17k
               "Space needed here\n");
7038
1.17k
  }
7039
21.5k
    }
7040
24.2k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
24.2k
    encoding = xmlParseEncodingDecl(ctxt);
7046
24.2k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
24.2k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
343
        ctxt->instate = oldstate;
7053
343
        return;
7054
343
    }
7055
23.9k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
5.86k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
5.86k
           "Missing encoding in text declaration\n");
7058
5.86k
    }
7059
7060
23.9k
    SKIP_BLANKS;
7061
23.9k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
17.8k
        SKIP(2);
7063
17.8k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
159
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
159
  NEXT;
7067
5.90k
    } else {
7068
5.90k
        int c;
7069
7070
5.90k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
361k
        while ((c = CUR) != 0) {
7072
359k
            NEXT;
7073
359k
            if (c == '>')
7074
4.10k
                break;
7075
359k
        }
7076
5.90k
    }
7077
7078
23.9k
    ctxt->instate = oldstate;
7079
23.9k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
108k
                       const xmlChar *SystemID) {
7096
108k
    xmlDetectSAX2(ctxt);
7097
108k
    GROW;
7098
7099
108k
    if ((ctxt->encoding == NULL) &&
7100
108k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
108k
        xmlChar start[4];
7102
108k
  xmlCharEncoding enc;
7103
7104
108k
  start[0] = RAW;
7105
108k
  start[1] = NXT(1);
7106
108k
  start[2] = NXT(2);
7107
108k
  start[3] = NXT(3);
7108
108k
  enc = xmlDetectCharEncoding(start, 4);
7109
108k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
20.5k
      xmlSwitchEncoding(ctxt, enc);
7111
108k
    }
7112
7113
108k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
18.8k
  xmlParseTextDecl(ctxt);
7115
18.8k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
287
      xmlHaltParser(ctxt);
7120
287
      return;
7121
287
  }
7122
18.8k
    }
7123
108k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
108k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
108k
    ctxt->instate = XML_PARSER_DTD;
7135
108k
    ctxt->external = 1;
7136
108k
    SKIP_BLANKS;
7137
63.8M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
63.7M
  GROW;
7139
63.7M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
1.92M
            xmlParseConditionalSections(ctxt);
7141
61.8M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
61.8M
            xmlParseMarkupDecl(ctxt);
7143
61.8M
        } else {
7144
35.4k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
35.4k
            xmlHaltParser(ctxt);
7146
35.4k
            return;
7147
35.4k
        }
7148
63.7M
        SKIP_BLANKS;
7149
63.7M
    }
7150
7151
72.6k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
72.6k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
18.5M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
18.5M
    xmlEntityPtr ent;
7175
18.5M
    xmlChar *val;
7176
18.5M
    int was_checked;
7177
18.5M
    xmlNodePtr list = NULL;
7178
18.5M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
18.5M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
18.5M
    if (NXT(1) == '#') {
7188
554k
  int i = 0;
7189
554k
  xmlChar out[16];
7190
554k
  int hex = NXT(2);
7191
554k
  int value = xmlParseCharRef(ctxt);
7192
7193
554k
  if (value == 0)
7194
127k
      return;
7195
427k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
301k
      if (value <= 0xFF) {
7202
291k
    out[0] = value;
7203
291k
    out[1] = 0;
7204
291k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
291k
        (!ctxt->disableSAX))
7206
239k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
291k
      } else {
7208
10.0k
    if ((hex == 'x') || (hex == 'X'))
7209
1.80k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
8.22k
    else
7211
8.22k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
10.0k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
10.0k
        (!ctxt->disableSAX))
7214
7.71k
        ctxt->sax->reference(ctxt->userData, out);
7215
10.0k
      }
7216
301k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
125k
      COPY_BUF(0 ,out, i, value);
7221
125k
      out[i] = 0;
7222
125k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
125k
    (!ctxt->disableSAX))
7224
105k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
125k
  }
7226
427k
  return;
7227
554k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
17.9M
    ent = xmlParseEntityRef(ctxt);
7233
17.9M
    if (ent == NULL) return;
7234
16.5M
    if (!ctxt->wellFormed)
7235
11.7M
  return;
7236
4.73M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
4.73M
    if ((ent->name == NULL) ||
7240
4.73M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
696k
  val = ent->content;
7242
696k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
696k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
696k
      (!ctxt->disableSAX))
7248
696k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
696k
  return;
7250
696k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
4.03M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
4.03M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
275k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
269k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
269k
  void *user_data;
7273
269k
  if (ctxt->userData == ctxt)
7274
269k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
269k
        ctxt->sizeentcopy = 0;
7280
7281
269k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
1.10k
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
1.10k
            xmlHaltParser(ctxt);
7284
1.10k
            return;
7285
1.10k
        }
7286
7287
267k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
267k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
188k
      ctxt->depth++;
7297
188k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
188k
                                                user_data, &list);
7299
188k
      ctxt->depth--;
7300
7301
188k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
79.9k
      ctxt->depth++;
7303
79.9k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
79.9k
                                     user_data, ctxt->depth, ent->URI,
7305
79.9k
             ent->ExternalID, &list);
7306
79.9k
      ctxt->depth--;
7307
79.9k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
267k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
267k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
267k
        ent->expandedSize = ctxt->sizeentcopy;
7316
267k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
9.53k
            xmlHaltParser(ctxt);
7318
9.53k
      xmlFreeNodeList(list);
7319
9.53k
      return;
7320
9.53k
  }
7321
258k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
258k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
177k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
177k
            if ((ctxt->replaceEntities == 0) ||
7333
177k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
177k
                ((list->type == XML_TEXT_NODE) &&
7335
166k
                 (list->next == NULL))) {
7336
166k
                ent->owner = 1;
7337
1.40M
                while (list != NULL) {
7338
1.24M
                    list->parent = (xmlNodePtr) ent;
7339
1.24M
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
1.24M
                    if (list->next == NULL)
7342
166k
                        ent->last = list;
7343
1.24M
                    list = list->next;
7344
1.24M
                }
7345
166k
                list = NULL;
7346
166k
            } else {
7347
11.4k
                ent->owner = 0;
7348
2.20M
                while (list != NULL) {
7349
2.19M
                    list->parent = (xmlNodePtr) ctxt->node;
7350
2.19M
                    list->doc = ctxt->myDoc;
7351
2.19M
                    if (list->next == NULL)
7352
11.4k
                        ent->last = list;
7353
2.19M
                    list = list->next;
7354
2.19M
                }
7355
11.4k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
11.4k
            }
7361
177k
  } else if ((ret != XML_ERR_OK) &&
7362
80.7k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
46.6k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
46.6k
         "Entity '%s' failed to parse\n", ent->name);
7365
46.6k
            if (ent->content != NULL)
7366
14.7k
                ent->content[0] = 0;
7367
46.6k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
258k
        was_checked = 0;
7374
258k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
4.02M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
801k
  if (was_checked != 0) {
7389
713k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
713k
      if (ctxt->userData == ctxt)
7396
713k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
713k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
10.1k
    ctxt->depth++;
7402
10.1k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
10.1k
           ent->content, user_data, NULL);
7404
10.1k
    ctxt->depth--;
7405
703k
      } else if (ent->etype ==
7406
703k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
703k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
703k
    ctxt->depth++;
7410
703k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
703k
         ctxt->sax, user_data, ctxt->depth,
7412
703k
         ent->URI, ent->ExternalID, NULL);
7413
703k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
703k
                ctxt->sizeentities = oldsizeentities;
7417
703k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
713k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
713k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
713k
  }
7429
801k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
801k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
163k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
163k
  }
7437
801k
  return;
7438
801k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
3.22M
    if ((was_checked != 0) &&
7445
3.22M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
490
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
3.22M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
3.22M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
1.40M
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
1.40M
  return;
7458
1.40M
    }
7459
7460
1.82M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
1.82M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
1.82M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
1.82M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
556k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
556k
    cur = ent->children;
7492
1.71M
    while (cur != NULL) {
7493
1.71M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
1.71M
        if (nw != NULL) {
7495
1.71M
      if (nw->_private == NULL)
7496
1.71M
          nw->_private = cur->_private;
7497
1.71M
      if (firstChild == NULL){
7498
556k
          firstChild = nw;
7499
556k
      }
7500
1.71M
      nw = xmlAddChild(ctxt->node, nw);
7501
1.71M
        }
7502
1.71M
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
556k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
556k
          (nw != NULL) &&
7509
556k
          (nw->type == XML_ELEMENT_NODE) &&
7510
556k
          (nw->children == NULL))
7511
4.51k
          nw->extra = 1;
7512
7513
556k
      break;
7514
556k
        }
7515
1.16M
        cur = cur->next;
7516
1.16M
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
1.26M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
1.26M
    xmlNodePtr nw = NULL, cur, next, last,
7523
1.26M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
1.26M
    cur = ent->children;
7532
1.26M
    ent->children = NULL;
7533
1.26M
    last = ent->last;
7534
1.26M
    ent->last = NULL;
7535
10.5M
    while (cur != NULL) {
7536
10.5M
        next = cur->next;
7537
10.5M
        cur->next = NULL;
7538
10.5M
        cur->parent = NULL;
7539
10.5M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
10.5M
        if (nw != NULL) {
7541
10.5M
      if (nw->_private == NULL)
7542
10.5M
          nw->_private = cur->_private;
7543
10.5M
      if (firstChild == NULL){
7544
1.26M
          firstChild = cur;
7545
1.26M
      }
7546
10.5M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
10.5M
        }
7548
10.5M
        xmlAddChild(ctxt->node, cur);
7549
10.5M
        if (cur == last)
7550
1.26M
      break;
7551
9.31M
        cur = next;
7552
9.31M
    }
7553
1.26M
    if (ent->owner == 0)
7554
11.4k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
1.26M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
1.82M
      ctxt->nodemem = 0;
7582
1.82M
      ctxt->nodelen = 0;
7583
1.82M
      return;
7584
1.82M
  }
7585
1.82M
    }
7586
1.82M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
25.9M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
25.9M
    const xmlChar *name;
7621
25.9M
    xmlEntityPtr ent = NULL;
7622
7623
25.9M
    GROW;
7624
25.9M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
25.9M
    if (RAW != '&')
7628
0
        return(NULL);
7629
25.9M
    NEXT;
7630
25.9M
    name = xmlParseName(ctxt);
7631
25.9M
    if (name == NULL) {
7632
217k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
217k
           "xmlParseEntityRef: no name\n");
7634
217k
        return(NULL);
7635
217k
    }
7636
25.7M
    if (RAW != ';') {
7637
158k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
158k
  return(NULL);
7639
158k
    }
7640
25.5M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
25.5M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
18.2M
        ent = xmlGetPredefinedEntity(name);
7647
18.2M
        if (ent != NULL)
7648
1.64M
            return(ent);
7649
18.2M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
23.9M
    if (ctxt->sax != NULL) {
7656
23.9M
  if (ctxt->sax->getEntity != NULL)
7657
23.9M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
23.9M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
23.9M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
33.4k
      ent = xmlGetPredefinedEntity(name);
7661
23.9M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
23.9M
      (ctxt->userData==ctxt)) {
7663
88.9k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
88.9k
  }
7665
23.9M
    }
7666
23.9M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
23.9M
    if (ent == NULL) {
7690
1.56M
  if ((ctxt->standalone == 1) ||
7691
1.56M
      ((ctxt->hasExternalSubset == 0) &&
7692
1.55M
       (ctxt->hasPErefs == 0))) {
7693
518k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
518k
         "Entity '%s' not defined\n", name);
7695
1.04M
  } else {
7696
1.04M
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
1.04M
         "Entity '%s' not defined\n", name);
7698
1.04M
      if ((ctxt->inSubset == 0) &&
7699
1.04M
    (ctxt->sax != NULL) &&
7700
1.04M
    (ctxt->sax->reference != NULL)) {
7701
1.04M
    ctxt->sax->reference(ctxt->userData, name);
7702
1.04M
      }
7703
1.04M
  }
7704
1.56M
  ctxt->valid = 0;
7705
1.56M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
22.3M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
9.32k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
9.32k
     "Entity reference to unparsed entity %s\n", name);
7715
9.32k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
22.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
22.3M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
15.9k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
15.9k
       "Attribute references external entity '%s'\n", name);
7726
15.9k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
22.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
22.3M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
7.29M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
70.4k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
1.49k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
70.4k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
70.4k
        }
7740
7.29M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
77.2k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
77.2k
                    "'<' in entity '%s' is not allowed in attributes "
7743
77.2k
                    "values\n", name);
7744
7.29M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
15.0M
    else {
7750
15.0M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
15.0M
      default:
7758
15.0M
      break;
7759
15.0M
  }
7760
15.0M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
23.9M
    return(ent);
7769
23.9M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
3.73G
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
3.73G
    xmlChar *name;
7805
3.73G
    const xmlChar *ptr;
7806
3.73G
    xmlChar cur;
7807
3.73G
    xmlEntityPtr ent = NULL;
7808
7809
3.73G
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
3.73G
    ptr = *str;
7812
3.73G
    cur = *ptr;
7813
3.73G
    if (cur != '&')
7814
3.48G
  return(NULL);
7815
7816
248M
    ptr++;
7817
248M
    name = xmlParseStringName(ctxt, &ptr);
7818
248M
    if (name == NULL) {
7819
25.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
25.2k
           "xmlParseStringEntityRef: no name\n");
7821
25.2k
  *str = ptr;
7822
25.2k
  return(NULL);
7823
25.2k
    }
7824
248M
    if (*ptr != ';') {
7825
27.2k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
27.2k
        xmlFree(name);
7827
27.2k
  *str = ptr;
7828
27.2k
  return(NULL);
7829
27.2k
    }
7830
248M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
248M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
178M
        ent = xmlGetPredefinedEntity(name);
7838
178M
        if (ent != NULL) {
7839
1.37M
            xmlFree(name);
7840
1.37M
            *str = ptr;
7841
1.37M
            return(ent);
7842
1.37M
        }
7843
178M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
247M
    if (ctxt->sax != NULL) {
7850
247M
  if (ctxt->sax->getEntity != NULL)
7851
247M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
247M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
11.0M
      ent = xmlGetPredefinedEntity(name);
7854
247M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
37.8M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
37.8M
  }
7857
247M
    }
7858
247M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
247M
    if (ent == NULL) {
7885
37.8M
  if ((ctxt->standalone == 1) ||
7886
37.8M
      ((ctxt->hasExternalSubset == 0) &&
7887
37.8M
       (ctxt->hasPErefs == 0))) {
7888
37.6M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
37.6M
         "Entity '%s' not defined\n", name);
7890
37.6M
  } else {
7891
206k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
206k
        "Entity '%s' not defined\n",
7893
206k
        name);
7894
206k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
37.8M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
209M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
3.49k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
3.49k
     "Entity reference to unparsed entity %s\n", name);
7906
3.49k
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
209M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
209M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
999k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
999k
   "Attribute references external entity '%s'\n", name);
7917
999k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
208M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
208M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
206M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
82.9k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.35k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
82.9k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
82.9k
        }
7931
206M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
1.57M
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
1.57M
                    "'<' in entity '%s' is not allowed in attributes "
7934
1.57M
                    "values\n", name);
7935
206M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
1.71M
    else {
7941
1.71M
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
1.71M
      default:
7949
1.71M
      break;
7950
1.71M
  }
7951
1.71M
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
247M
    xmlFree(name);
7961
247M
    *str = ptr;
7962
247M
    return(ent);
7963
247M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
436M
{
8000
436M
    const xmlChar *name;
8001
436M
    xmlEntityPtr entity = NULL;
8002
436M
    xmlParserInputPtr input;
8003
8004
436M
    if (RAW != '%')
8005
0
        return;
8006
436M
    NEXT;
8007
436M
    name = xmlParseName(ctxt);
8008
436M
    if (name == NULL) {
8009
1.20M
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
1.20M
  return;
8011
1.20M
    }
8012
435M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
435M
    if (RAW != ';') {
8016
6.39M
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
6.39M
        return;
8018
6.39M
    }
8019
8020
429M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
429M
    if ((ctxt->sax != NULL) &&
8026
429M
  (ctxt->sax->getParameterEntity != NULL))
8027
429M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
429M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
429M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
84.2M
  if ((ctxt->standalone == 1) ||
8040
84.2M
      ((ctxt->hasExternalSubset == 0) &&
8041
84.2M
       (ctxt->hasPErefs == 0))) {
8042
3.00k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
3.00k
            "PEReference: %%%s; not found\n",
8044
3.00k
            name);
8045
84.2M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
84.2M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
19.9M
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
19.9M
                                 "PEReference: %%%s; not found\n",
8056
19.9M
                                 name, NULL);
8057
19.9M
            } else
8058
64.2M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
64.2M
                              "PEReference: %%%s; not found\n",
8060
64.2M
                              name, NULL);
8061
84.2M
            ctxt->valid = 0;
8062
84.2M
  }
8063
344M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
344M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
344M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
344M
  } else {
8073
344M
            xmlChar start[4];
8074
344M
            xmlCharEncoding enc;
8075
344M
            unsigned long parentConsumed;
8076
344M
            xmlEntityPtr oldEnt;
8077
8078
344M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
344M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
344M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
344M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
344M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
344M
    (ctxt->replaceEntities == 0) &&
8084
344M
    (ctxt->validate == 0))
8085
281
    return;
8086
8087
344M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
589
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
589
                xmlHaltParser(ctxt);
8090
589
                return;
8091
589
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
344M
            parentConsumed = ctxt->input->parentConsumed;
8095
344M
            oldEnt = ctxt->input->entity;
8096
344M
            if ((oldEnt == NULL) ||
8097
344M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
339M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
6.47M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
6.47M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
6.47M
                                     ctxt->input->cur - ctxt->input->base);
8102
6.47M
            }
8103
8104
344M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
344M
      if (xmlPushInput(ctxt, input) < 0) {
8106
4.76k
                xmlFreeInputStream(input);
8107
4.76k
    return;
8108
4.76k
            }
8109
8110
344M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
344M
            input->parentConsumed = parentConsumed;
8113
8114
344M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
17.2k
                GROW
8125
17.2k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
17.2k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
17.1k
                    start[0] = RAW;
8129
17.1k
                    start[1] = NXT(1);
8130
17.1k
                    start[2] = NXT(2);
8131
17.1k
                    start[3] = NXT(3);
8132
17.1k
                    enc = xmlDetectCharEncoding(start, 4);
8133
17.1k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
1.29k
                        xmlSwitchEncoding(ctxt, enc);
8135
1.29k
                    }
8136
17.1k
                }
8137
8138
17.2k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
17.2k
                    (IS_BLANK_CH(NXT(5)))) {
8140
959
                    xmlParseTextDecl(ctxt);
8141
959
                }
8142
17.2k
            }
8143
344M
  }
8144
344M
    }
8145
429M
    ctxt->hasPErefs = 1;
8146
429M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
5.44k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
5.44k
    xmlParserInputPtr input;
8162
5.44k
    xmlBufferPtr buf;
8163
5.44k
    int l, c;
8164
5.44k
    int count = 0;
8165
8166
5.44k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
5.44k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
5.44k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
5.44k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
5.44k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
5.44k
    buf = xmlBufferCreate();
8180
5.44k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
5.44k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
5.44k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
5.44k
    if (input == NULL) {
8189
1.09k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
1.09k
              "xmlLoadEntityContent input error");
8191
1.09k
  xmlBufferFree(buf);
8192
1.09k
        return(-1);
8193
1.09k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
4.34k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
4.34k
    GROW;
8206
4.34k
    c = CUR_CHAR(l);
8207
9.49M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
9.49M
           (IS_CHAR(c))) {
8209
9.49M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
9.49M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
91.2k
      count = 0;
8212
91.2k
      GROW;
8213
91.2k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
91.2k
  }
8218
9.49M
  NEXTL(l);
8219
9.49M
  c = CUR_CHAR(l);
8220
9.49M
  if (c == 0) {
8221
3.41k
      count = 0;
8222
3.41k
      GROW;
8223
3.41k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
3.41k
      c = CUR_CHAR(l);
8228
3.41k
  }
8229
9.49M
    }
8230
8231
4.34k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
2.23k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
2.23k
        xmlPopInput(ctxt);
8234
2.23k
    } else if (!IS_CHAR(c)) {
8235
2.11k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
2.11k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
2.11k
                    c);
8238
2.11k
  xmlBufferFree(buf);
8239
2.11k
  return(-1);
8240
2.11k
    }
8241
2.23k
    entity->content = buf->content;
8242
2.23k
    entity->length = buf->use;
8243
2.23k
    buf->content = NULL;
8244
2.23k
    xmlBufferFree(buf);
8245
8246
2.23k
    return(0);
8247
4.34k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
3.54M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
3.54M
    const xmlChar *ptr;
8283
3.54M
    xmlChar cur;
8284
3.54M
    xmlChar *name;
8285
3.54M
    xmlEntityPtr entity = NULL;
8286
8287
3.54M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
3.54M
    ptr = *str;
8289
3.54M
    cur = *ptr;
8290
3.54M
    if (cur != '%')
8291
0
        return(NULL);
8292
3.54M
    ptr++;
8293
3.54M
    name = xmlParseStringName(ctxt, &ptr);
8294
3.54M
    if (name == NULL) {
8295
14.9k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
14.9k
           "xmlParseStringPEReference: no name\n");
8297
14.9k
  *str = ptr;
8298
14.9k
  return(NULL);
8299
14.9k
    }
8300
3.52M
    cur = *ptr;
8301
3.52M
    if (cur != ';') {
8302
4.23k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
4.23k
  xmlFree(name);
8304
4.23k
  *str = ptr;
8305
4.23k
  return(NULL);
8306
4.23k
    }
8307
3.52M
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
3.52M
    if ((ctxt->sax != NULL) &&
8313
3.52M
  (ctxt->sax->getParameterEntity != NULL))
8314
3.52M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
3.52M
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
3.52M
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
137k
  if ((ctxt->standalone == 1) ||
8330
137k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
1.08k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
1.08k
     "PEReference: %%%s; not found\n", name);
8333
136k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
136k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
136k
        "PEReference: %%%s; not found\n",
8343
136k
        name, NULL);
8344
136k
      ctxt->valid = 0;
8345
136k
  }
8346
3.38M
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
3.38M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
3.38M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
3.38M
    }
8357
3.52M
    ctxt->hasPErefs = 1;
8358
3.52M
    xmlFree(name);
8359
3.52M
    *str = ptr;
8360
3.52M
    return(entity);
8361
3.52M
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
485k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
485k
    const xmlChar *name = NULL;
8382
485k
    xmlChar *ExternalID = NULL;
8383
485k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
485k
    SKIP(9);
8389
8390
485k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
485k
    name = xmlParseName(ctxt);
8396
485k
    if (name == NULL) {
8397
2.07k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
2.07k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
2.07k
    }
8400
485k
    ctxt->intSubName = name;
8401
8402
485k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
485k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
485k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
235k
        ctxt->hasExternalSubset = 1;
8411
235k
    }
8412
485k
    ctxt->extSubURI = URI;
8413
485k
    ctxt->extSubSystem = ExternalID;
8414
8415
485k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
485k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
485k
  (!ctxt->disableSAX))
8422
468k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
485k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
485k
    if (RAW == '[')
8431
336k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
149k
    if (RAW != '>') {
8437
27.9k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
27.9k
    }
8439
149k
    NEXT;
8440
149k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
338k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
338k
    if (RAW == '[') {
8457
338k
        int baseInputNr = ctxt->inputNr;
8458
338k
        ctxt->instate = XML_PARSER_DTD;
8459
338k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
338k
  SKIP_BLANKS;
8466
345M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
345M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
345M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
345M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
345M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
344M
          xmlParseMarkupDecl(ctxt);
8478
344M
            } else if (RAW == '%') {
8479
891k
          xmlParsePEReference(ctxt);
8480
891k
            } else {
8481
66.8k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
66.8k
                        "xmlParseInternalSubset: error detected in"
8483
66.8k
                        " Markup declaration\n");
8484
66.8k
                xmlHaltParser(ctxt);
8485
66.8k
                return;
8486
66.8k
            }
8487
345M
      SKIP_BLANKS;
8488
345M
  }
8489
271k
  if (RAW == ']') {
8490
250k
      NEXT;
8491
250k
      SKIP_BLANKS;
8492
250k
  }
8493
271k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
271k
    if (RAW != '>') {
8499
23.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
23.3k
  return;
8501
23.3k
    }
8502
248k
    NEXT;
8503
248k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
21.7M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
21.7M
    const xmlChar *name;
8544
21.7M
    xmlChar *val;
8545
8546
21.7M
    *value = NULL;
8547
21.7M
    GROW;
8548
21.7M
    name = xmlParseName(ctxt);
8549
21.7M
    if (name == NULL) {
8550
772k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
772k
                 "error parsing attribute name\n");
8552
772k
        return(NULL);
8553
772k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
21.0M
    SKIP_BLANKS;
8559
21.0M
    if (RAW == '=') {
8560
20.6M
        NEXT;
8561
20.6M
  SKIP_BLANKS;
8562
20.6M
  val = xmlParseAttValue(ctxt);
8563
20.6M
  ctxt->instate = XML_PARSER_CONTENT;
8564
20.6M
    } else {
8565
361k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
361k
         "Specification mandates value for attribute %s\n", name);
8567
361k
  return(name);
8568
361k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
20.6M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
60.7k
  if (!xmlCheckLanguageID(val)) {
8577
38.8k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
38.8k
              "Malformed value for xml:lang : %s\n",
8579
38.8k
        val, NULL);
8580
38.8k
  }
8581
60.7k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
20.6M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
2.76k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
2.76k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
740
      *(ctxt->space) = 1;
8591
2.02k
  else {
8592
2.02k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
2.02k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
2.02k
                                 val, NULL);
8595
2.02k
  }
8596
2.76k
    }
8597
8598
20.6M
    *value = val;
8599
20.6M
    return(name);
8600
21.0M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
22.9M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
22.9M
    const xmlChar *name;
8634
22.9M
    const xmlChar *attname;
8635
22.9M
    xmlChar *attvalue;
8636
22.9M
    const xmlChar **atts = ctxt->atts;
8637
22.9M
    int nbatts = 0;
8638
22.9M
    int maxatts = ctxt->maxatts;
8639
22.9M
    int i;
8640
8641
22.9M
    if (RAW != '<') return(NULL);
8642
22.9M
    NEXT1;
8643
8644
22.9M
    name = xmlParseName(ctxt);
8645
22.9M
    if (name == NULL) {
8646
371k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
371k
       "xmlParseStartTag: invalid element name\n");
8648
371k
        return(NULL);
8649
371k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
22.6M
    SKIP_BLANKS;
8657
22.6M
    GROW;
8658
8659
30.8M
    while (((RAW != '>') &&
8660
30.8M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
30.8M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
21.7M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
21.7M
        if (attname == NULL) {
8664
772k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
772k
         "xmlParseStartTag: problem parsing attributes\n");
8666
772k
      break;
8667
772k
  }
8668
21.0M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
28.2M
      for (i = 0; i < nbatts;i += 2) {
8675
7.70M
          if (xmlStrEqual(atts[i], attname)) {
8676
17.8k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
17.8k
        xmlFree(attvalue);
8678
17.8k
        goto failed;
8679
17.8k
    }
8680
7.70M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
20.5M
      if (atts == NULL) {
8685
129k
          maxatts = 22; /* allow for 10 attrs by default */
8686
129k
          atts = (const xmlChar **)
8687
129k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
129k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
129k
    ctxt->atts = atts;
8695
129k
    ctxt->maxatts = maxatts;
8696
20.4M
      } else if (nbatts + 4 > maxatts) {
8697
226
          const xmlChar **n;
8698
8699
226
          maxatts *= 2;
8700
226
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
226
               maxatts * sizeof(const xmlChar *));
8702
226
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
226
    atts = n;
8709
226
    ctxt->atts = atts;
8710
226
    ctxt->maxatts = maxatts;
8711
226
      }
8712
20.5M
      atts[nbatts++] = attname;
8713
20.5M
      atts[nbatts++] = attvalue;
8714
20.5M
      atts[nbatts] = NULL;
8715
20.5M
      atts[nbatts + 1] = NULL;
8716
20.5M
  } else {
8717
408k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
408k
  }
8720
8721
21.0M
failed:
8722
8723
21.0M
  GROW
8724
21.0M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
12.7M
      break;
8726
8.24M
  if (SKIP_BLANKS == 0) {
8727
731k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
731k
         "attributes construct error\n");
8729
731k
  }
8730
8.24M
  SHRINK;
8731
8.24M
        GROW;
8732
8.24M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
22.6M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
22.6M
  (!ctxt->disableSAX)) {
8739
20.9M
  if (nbatts > 0)
8740
12.2M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
8.73M
  else
8742
8.73M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
20.9M
    }
8744
8745
22.6M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
42.5M
        for (i = 1;i < nbatts;i+=2)
8748
20.5M
      if (atts[i] != NULL)
8749
20.5M
         xmlFree((xmlChar *) atts[i]);
8750
21.9M
    }
8751
22.6M
    return(name);
8752
22.6M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
13.8M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
13.8M
    const xmlChar *name;
8772
8773
13.8M
    GROW;
8774
13.8M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
13.8M
    SKIP(2);
8780
8781
13.8M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
13.8M
    GROW;
8787
13.8M
    SKIP_BLANKS;
8788
13.8M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
125k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
125k
    } else
8791
13.7M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
13.8M
    if (name != (xmlChar*)1) {
8800
373k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
373k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
373k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
373k
                    ctxt->name, line, name);
8804
373k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
13.8M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
13.8M
  (!ctxt->disableSAX))
8811
12.8M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
13.8M
    namePop(ctxt);
8814
13.8M
    spacePop(ctxt);
8815
13.8M
    return;
8816
13.8M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
33.4M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
33.4M
    int i;
8858
8859
33.4M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
52.0M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
20.5M
        if (ctxt->nsTab[i] == prefix) {
8862
1.37M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
79.9k
          return(NULL);
8864
1.29M
      return(ctxt->nsTab[i + 1]);
8865
1.37M
  }
8866
31.5M
    return(NULL);
8867
32.9M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
65.7M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
65.7M
    const xmlChar *l, *p;
8886
8887
65.7M
    GROW;
8888
8889
65.7M
    l = xmlParseNCName(ctxt);
8890
65.7M
    if (l == NULL) {
8891
770k
        if (CUR == ':') {
8892
18.9k
      l = xmlParseName(ctxt);
8893
18.9k
      if (l != NULL) {
8894
18.9k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
18.9k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
18.9k
    *prefix = NULL;
8897
18.9k
    return(l);
8898
18.9k
      }
8899
18.9k
  }
8900
751k
        return(NULL);
8901
770k
    }
8902
65.0M
    if (CUR == ':') {
8903
2.25M
        NEXT;
8904
2.25M
  p = l;
8905
2.25M
  l = xmlParseNCName(ctxt);
8906
2.25M
  if (l == NULL) {
8907
68.1k
      xmlChar *tmp;
8908
8909
68.1k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
68.1k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
68.1k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
68.1k
      l = xmlParseNmtoken(ctxt);
8914
68.1k
      if (l == NULL) {
8915
49.2k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
49.2k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
49.2k
            } else {
8919
18.8k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
18.8k
    xmlFree((char *)l);
8921
18.8k
      }
8922
68.1k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
68.1k
      if (tmp != NULL) xmlFree(tmp);
8924
68.1k
      *prefix = NULL;
8925
68.1k
      return(p);
8926
68.1k
  }
8927
2.18M
  if (CUR == ':') {
8928
63.0k
      xmlChar *tmp;
8929
8930
63.0k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
63.0k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
63.0k
      NEXT;
8933
63.0k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
63.0k
      if (tmp != NULL) {
8935
53.1k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
53.1k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
53.1k
    if (tmp != NULL) xmlFree(tmp);
8938
53.1k
    *prefix = p;
8939
53.1k
    return(l);
8940
53.1k
      }
8941
9.88k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
9.88k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
9.88k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
9.88k
      if (tmp != NULL) xmlFree(tmp);
8946
9.88k
      *prefix = p;
8947
9.88k
      return(l);
8948
9.88k
  }
8949
2.11M
  *prefix = p;
8950
2.11M
    } else
8951
62.7M
        *prefix = NULL;
8952
64.8M
    return(l);
8953
65.0M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
456k
                        xmlChar const *prefix) {
8971
456k
    const xmlChar *cmp;
8972
456k
    const xmlChar *in;
8973
456k
    const xmlChar *ret;
8974
456k
    const xmlChar *prefix2;
8975
8976
456k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
456k
    GROW;
8979
456k
    in = ctxt->input->cur;
8980
8981
456k
    cmp = prefix;
8982
1.53M
    while (*in != 0 && *in == *cmp) {
8983
1.07M
  ++in;
8984
1.07M
  ++cmp;
8985
1.07M
    }
8986
456k
    if ((*cmp == 0) && (*in == ':')) {
8987
411k
        in++;
8988
411k
  cmp = name;
8989
2.95M
  while (*in != 0 && *in == *cmp) {
8990
2.53M
      ++in;
8991
2.53M
      ++cmp;
8992
2.53M
  }
8993
411k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
299k
            ctxt->input->col += in - ctxt->input->cur;
8996
299k
      ctxt->input->cur = in;
8997
299k
      return((const xmlChar*) 1);
8998
299k
  }
8999
411k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
156k
    ret = xmlParseQName (ctxt, &prefix2);
9004
156k
    if ((ret == name) && (prefix == prefix2))
9005
2.68k
  return((const xmlChar*) 1);
9006
153k
    return ret;
9007
156k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
18.0k
    const xmlChar *oldbase = ctxt->input->base;\
9045
18.0k
    GROW;\
9046
18.0k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
18.0k
        return(NULL);\
9048
18.0k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
18.0k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
54.1M
{
9059
54.1M
    xmlChar limit = 0;
9060
54.1M
    const xmlChar *in = NULL, *start, *end, *last;
9061
54.1M
    xmlChar *ret = NULL;
9062
54.1M
    int line, col;
9063
54.1M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
13.6M
                    XML_MAX_HUGE_LENGTH :
9065
54.1M
                    XML_MAX_TEXT_LENGTH;
9066
9067
54.1M
    GROW;
9068
54.1M
    in = (xmlChar *) CUR_PTR;
9069
54.1M
    line = ctxt->input->line;
9070
54.1M
    col = ctxt->input->col;
9071
54.1M
    if (*in != '"' && *in != '\'') {
9072
109k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
109k
        return (NULL);
9074
109k
    }
9075
53.9M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
53.9M
    limit = *in++;
9083
53.9M
    col++;
9084
53.9M
    end = ctxt->input->end;
9085
53.9M
    start = in;
9086
53.9M
    if (in >= end) {
9087
1.24k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
1.24k
    }
9089
53.9M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
4.07M
  while ((in < end) && (*in != limit) &&
9094
4.07M
         ((*in == 0x20) || (*in == 0x9) ||
9095
4.07M
          (*in == 0xA) || (*in == 0xD))) {
9096
181k
      if (*in == 0xA) {
9097
80.7k
          line++; col = 1;
9098
100k
      } else {
9099
100k
          col++;
9100
100k
      }
9101
181k
      in++;
9102
181k
      start = in;
9103
181k
      if (in >= end) {
9104
181
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
181
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
181
      }
9111
181k
  }
9112
39.8M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
39.8M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
36.0M
      col++;
9115
36.0M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
36.0M
      if (in >= end) {
9117
553
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
553
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
553
      }
9124
36.0M
  }
9125
3.89M
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
3.91M
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
3.98M
  while ((in < end) && (*in != limit) &&
9131
3.98M
         ((*in == 0x20) || (*in == 0x9) ||
9132
184k
          (*in == 0xA) || (*in == 0xD))) {
9133
88.5k
      if (*in == 0xA) {
9134
36.3k
          line++, col = 1;
9135
52.1k
      } else {
9136
52.1k
          col++;
9137
52.1k
      }
9138
88.5k
      in++;
9139
88.5k
      if (in >= end) {
9140
288
    const xmlChar *oldbase = ctxt->input->base;
9141
288
    GROW;
9142
288
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
288
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
288
    end = ctxt->input->end;
9151
288
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
288
      }
9157
88.5k
  }
9158
3.89M
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
3.89M
  if (*in != limit) goto need_complex;
9164
50.1M
    } else {
9165
510M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
510M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
460M
      in++;
9168
460M
      col++;
9169
460M
      if (in >= end) {
9170
16.0k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
16.0k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
16.0k
      }
9177
460M
  }
9178
50.1M
  last = in;
9179
50.1M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
50.1M
  if (*in != limit) goto need_complex;
9185
50.1M
    }
9186
52.4M
    in++;
9187
52.4M
    col++;
9188
52.4M
    if (len != NULL) {
9189
31.3M
        if (alloc) *alloc = 0;
9190
31.3M
        *len = last - start;
9191
31.3M
        ret = (xmlChar *) start;
9192
31.3M
    } else {
9193
21.0M
        if (alloc) *alloc = 1;
9194
21.0M
        ret = xmlStrndup(start, last - start);
9195
21.0M
    }
9196
52.4M
    CUR_PTR = in;
9197
52.4M
    ctxt->input->line = line;
9198
52.4M
    ctxt->input->col = col;
9199
52.4M
    return ret;
9200
1.58M
need_complex:
9201
1.58M
    if (alloc) *alloc = 1;
9202
1.58M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
53.9M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
32.8M
{
9226
32.8M
    const xmlChar *name;
9227
32.8M
    xmlChar *val, *internal_val = NULL;
9228
32.8M
    int normalize = 0;
9229
9230
32.8M
    *value = NULL;
9231
32.8M
    GROW;
9232
32.8M
    name = xmlParseQName(ctxt, prefix);
9233
32.8M
    if (name == NULL) {
9234
402k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
402k
                       "error parsing attribute name\n");
9236
402k
        return (NULL);
9237
402k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
32.4M
    if (ctxt->attsSpecial != NULL) {
9243
6.53M
        int type;
9244
9245
6.53M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
6.53M
                                                 pref, elem, *prefix, name);
9247
6.53M
        if (type != 0)
9248
3.90M
            normalize = 1;
9249
6.53M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
32.4M
    SKIP_BLANKS;
9255
32.4M
    if (RAW == '=') {
9256
32.2M
        NEXT;
9257
32.2M
        SKIP_BLANKS;
9258
32.2M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
32.2M
        if (val == NULL)
9260
45.8k
            return (NULL);
9261
32.2M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
3.89M
      if (*alloc) {
9269
97.5k
          const xmlChar *val2;
9270
9271
97.5k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
97.5k
    if ((val2 != NULL) && (val2 != val)) {
9273
15.6k
        xmlFree(val);
9274
15.6k
        val = (xmlChar *) val2;
9275
15.6k
    }
9276
97.5k
      }
9277
3.89M
  }
9278
32.2M
        ctxt->instate = XML_PARSER_CONTENT;
9279
32.2M
    } else {
9280
236k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
236k
                          "Specification mandates value for attribute %s\n",
9282
236k
                          name);
9283
236k
        return (name);
9284
236k
    }
9285
9286
32.2M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
140k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
32.1k
            internal_val = xmlStrndup(val, *len);
9294
32.1k
            if (!xmlCheckLanguageID(internal_val)) {
9295
16.1k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
16.1k
                              "Malformed value for xml:lang : %s\n",
9297
16.1k
                              internal_val, NULL);
9298
16.1k
            }
9299
32.1k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
140k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
2.94k
            internal_val = xmlStrndup(val, *len);
9306
2.94k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
2.94k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
515
                *(ctxt->space) = 1;
9310
2.43k
            else {
9311
2.43k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
2.43k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
2.43k
                              internal_val, NULL);
9314
2.43k
            }
9315
2.94k
        }
9316
140k
        if (internal_val) {
9317
35.0k
            xmlFree(internal_val);
9318
35.0k
        }
9319
140k
    }
9320
9321
32.2M
    *value = val;
9322
32.2M
    return (name);
9323
32.4M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
32.7M
                  const xmlChar **URI, int *tlen) {
9356
32.7M
    const xmlChar *localname;
9357
32.7M
    const xmlChar *prefix;
9358
32.7M
    const xmlChar *attname;
9359
32.7M
    const xmlChar *aprefix;
9360
32.7M
    const xmlChar *nsname;
9361
32.7M
    xmlChar *attvalue;
9362
32.7M
    const xmlChar **atts = ctxt->atts;
9363
32.7M
    int maxatts = ctxt->maxatts;
9364
32.7M
    int nratts, nbatts, nbdef, inputid;
9365
32.7M
    int i, j, nbNs, attval;
9366
32.7M
    unsigned long cur;
9367
32.7M
    int nsNr = ctxt->nsNr;
9368
9369
32.7M
    if (RAW != '<') return(NULL);
9370
32.7M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
32.7M
    SHRINK;
9380
32.7M
    cur = ctxt->input->cur - ctxt->input->base;
9381
32.7M
    inputid = ctxt->input->id;
9382
32.7M
    nbatts = 0;
9383
32.7M
    nratts = 0;
9384
32.7M
    nbdef = 0;
9385
32.7M
    nbNs = 0;
9386
32.7M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
32.7M
    ctxt->nsNr = nsNr;
9389
9390
32.7M
    localname = xmlParseQName(ctxt, &prefix);
9391
32.7M
    if (localname == NULL) {
9392
338k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
338k
           "StartTag: invalid element name\n");
9394
338k
        return(NULL);
9395
338k
    }
9396
32.3M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
32.3M
    SKIP_BLANKS;
9404
32.3M
    GROW;
9405
9406
44.6M
    while (((RAW != '>') &&
9407
44.6M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
44.6M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
32.8M
  int len = -1, alloc = 0;
9410
9411
32.8M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
32.8M
                               &aprefix, &attvalue, &len, &alloc);
9413
32.8M
        if (attname == NULL) {
9414
448k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
448k
           "xmlParseStartTag: problem parsing attributes\n");
9416
448k
      break;
9417
448k
  }
9418
32.4M
        if (attvalue == NULL)
9419
236k
            goto next_attr;
9420
32.2M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
32.2M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
212k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
212k
            xmlURIPtr uri;
9425
9426
212k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
212k
            if (*URL != 0) {
9434
204k
                uri = xmlParseURI((const char *) URL);
9435
204k
                if (uri == NULL) {
9436
69.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
69.1k
                             "xmlns: '%s' is not a valid URI\n",
9438
69.1k
                                       URL, NULL, NULL);
9439
135k
                } else {
9440
135k
                    if (uri->scheme == NULL) {
9441
60.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
60.7k
                                  "xmlns: URI %s is not absolute\n",
9443
60.7k
                                  URL, NULL, NULL);
9444
60.7k
                    }
9445
135k
                    xmlFreeURI(uri);
9446
135k
                }
9447
204k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
204k
                if ((len == 29) &&
9456
204k
                    (xmlStrEqual(URL,
9457
2.00k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
204k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
232k
            for (j = 1;j <= nbNs;j++)
9468
25.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
6.14k
                    break;
9470
212k
            if (j <= nbNs)
9471
6.14k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
206k
            else
9473
206k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
32.0M
        } else if (aprefix == ctxt->str_xmlns) {
9476
252k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
252k
            xmlURIPtr uri;
9478
9479
252k
            if (attname == ctxt->str_xml) {
9480
2.95k
                if (URL != ctxt->str_xml_ns) {
9481
2.95k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
2.95k
                             "xml namespace prefix mapped to wrong URI\n",
9483
2.95k
                             NULL, NULL, NULL);
9484
2.95k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
2.95k
                goto next_attr;
9489
2.95k
            }
9490
250k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
250k
            if (attname == ctxt->str_xmlns) {
9499
347
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
347
                         "redefinition of the xmlns prefix is forbidden\n",
9501
347
                         NULL, NULL, NULL);
9502
347
                goto next_attr;
9503
347
            }
9504
249k
            if ((len == 29) &&
9505
249k
                (xmlStrEqual(URL,
9506
11.8k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
249k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
15.3k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
15.3k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
15.3k
                              attname, NULL, NULL);
9516
15.3k
                goto next_attr;
9517
234k
            } else {
9518
234k
                uri = xmlParseURI((const char *) URL);
9519
234k
                if (uri == NULL) {
9520
53.7k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
53.7k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
53.7k
                                       attname, URL, NULL);
9523
180k
                } else {
9524
180k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
18.6k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
18.6k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
18.6k
                                  attname, URL, NULL);
9528
18.6k
                    }
9529
180k
                    xmlFreeURI(uri);
9530
180k
                }
9531
234k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
395k
            for (j = 1;j <= nbNs;j++)
9537
175k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
13.7k
                    break;
9539
234k
            if (j <= nbNs)
9540
13.7k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
220k
            else
9542
220k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
31.7M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
31.7M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
144k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
144k
                maxatts = ctxt->maxatts;
9553
144k
                atts = ctxt->atts;
9554
144k
            }
9555
31.7M
            ctxt->attallocs[nratts++] = alloc;
9556
31.7M
            atts[nbatts++] = attname;
9557
31.7M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
31.7M
            if (alloc)
9565
685k
                atts[nbatts++] = NULL;
9566
31.0M
            else
9567
31.0M
                atts[nbatts++] = ctxt->input->base;
9568
31.7M
            atts[nbatts++] = attvalue;
9569
31.7M
            attvalue += len;
9570
31.7M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
31.7M
            if (alloc != 0) attval = 1;
9575
31.7M
            attvalue = NULL; /* moved into atts */
9576
31.7M
        }
9577
9578
32.4M
next_attr:
9579
32.4M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
154k
            xmlFree(attvalue);
9581
154k
            attvalue = NULL;
9582
154k
        }
9583
9584
32.4M
  GROW
9585
32.4M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
32.4M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
19.5M
      break;
9589
12.8M
  if (SKIP_BLANKS == 0) {
9590
595k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
595k
         "attributes construct error\n");
9592
595k
      break;
9593
595k
  }
9594
12.2M
        GROW;
9595
12.2M
    }
9596
9597
32.3M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
64.1M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
31.7M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
31.0M
            const xmlChar *old = atts[i+2];
9612
31.0M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
31.0M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
31.0M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
31.0M
        }
9616
31.7M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
32.3M
    if (ctxt->attsDefault != NULL) {
9622
12.2M
        xmlDefAttrsPtr defaults;
9623
9624
12.2M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
12.2M
  if (defaults != NULL) {
9626
2.26M
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
1.54M
          attname = defaults->values[5 * i];
9628
1.54M
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
1.54M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
35.1k
        for (j = 1;j <= nbNs;j++)
9638
6.53k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
3.35k
          break;
9640
31.9k
              if (j <= nbNs) continue;
9641
9642
28.6k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
28.6k
        if (nsname != defaults->values[5 * i + 2]) {
9644
9.53k
      if (nsPush(ctxt, NULL,
9645
9.53k
                 defaults->values[5 * i + 2]) > 0)
9646
9.34k
          nbNs++;
9647
9.53k
        }
9648
1.51M
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
25.2k
        for (j = 1;j <= nbNs;j++)
9653
10.8k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
5.99k
          break;
9655
20.3k
              if (j <= nbNs) continue;
9656
9657
14.3k
        nsname = xmlGetNamespace(ctxt, attname);
9658
14.3k
        if (nsname != defaults->values[5 * i + 2]) {
9659
4.33k
      if (nsPush(ctxt, attname,
9660
4.33k
                 defaults->values[5 * i + 2]) > 0)
9661
4.33k
          nbNs++;
9662
4.33k
        }
9663
1.49M
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
4.04M
        for (j = 0;j < nbatts;j+=5) {
9668
2.59M
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
40.4k
          break;
9670
2.59M
        }
9671
1.49M
        if (j < nbatts) continue;
9672
9673
1.45M
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
3.40k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
3.40k
      maxatts = ctxt->maxatts;
9679
3.40k
      atts = ctxt->atts;
9680
3.40k
        }
9681
1.45M
        atts[nbatts++] = attname;
9682
1.45M
        atts[nbatts++] = aprefix;
9683
1.45M
        if (aprefix == NULL)
9684
1.21M
      atts[nbatts++] = NULL;
9685
241k
        else
9686
241k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
1.45M
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
1.45M
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
1.45M
        if ((ctxt->standalone == 1) &&
9690
1.45M
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
1.45M
        nbdef++;
9696
1.45M
    }
9697
1.54M
      }
9698
720k
  }
9699
12.2M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
65.5M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
33.1M
  if (atts[i + 1] != NULL) {
9709
829k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
829k
      if (nsname == NULL) {
9711
170k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
170k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
170k
        atts[i + 1], atts[i], localname);
9714
170k
      }
9715
829k
      atts[i + 2] = nsname;
9716
829k
  } else
9717
32.3M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
47.9M
        for (j = 0; j < i;j += 5) {
9725
14.8M
      if (atts[i] == atts[j]) {
9726
58.2k
          if (atts[i+1] == atts[j+1]) {
9727
19.6k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
19.6k
        break;
9729
19.6k
    }
9730
38.6k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
785
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
785
           "Namespaced Attribute %s in '%s' redefined\n",
9733
785
           atts[i], nsname, NULL);
9734
785
        break;
9735
785
    }
9736
38.6k
      }
9737
14.8M
  }
9738
33.1M
    }
9739
9740
32.3M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
32.3M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
632k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
632k
           "Namespace prefix %s on %s is not defined\n",
9744
632k
     prefix, localname, NULL);
9745
632k
    }
9746
32.3M
    *pref = prefix;
9747
32.3M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
32.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
32.3M
  (!ctxt->disableSAX)) {
9754
26.6M
  if (nbNs > 0)
9755
249k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
249k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
249k
        nbatts / 5, nbdef, atts);
9758
26.3M
  else
9759
26.3M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
26.3M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
26.6M
    }
9762
9763
32.3M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
32.3M
    if (attval != 0) {
9768
1.45M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
803k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
685k
          xmlFree((xmlChar *) atts[i]);
9771
651k
    }
9772
9773
32.3M
    return(localname);
9774
32.3M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
18.8M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
18.8M
    const xmlChar *name;
9794
9795
18.8M
    GROW;
9796
18.8M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
18.8M
    SKIP(2);
9801
9802
18.8M
    if (tag->prefix == NULL)
9803
18.3M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
456k
    else
9805
456k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
18.8M
    GROW;
9811
18.8M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
18.8M
    SKIP_BLANKS;
9814
18.8M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
155k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
155k
    } else
9817
18.6M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
18.8M
    if (name != (xmlChar*)1) {
9826
440k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
440k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
440k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
440k
                    ctxt->name, tag->line, name);
9830
440k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
18.8M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
18.8M
  (!ctxt->disableSAX))
9837
15.6M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
15.6M
                                tag->URI);
9839
9840
18.8M
    spacePop(ctxt);
9841
18.8M
    if (tag->nsNr != 0)
9842
40.7k
  nsPop(ctxt, tag->nsNr);
9843
18.8M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
122k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
122k
    xmlChar *buf = NULL;
9864
122k
    int len = 0;
9865
122k
    int size = XML_PARSER_BUFFER_SIZE;
9866
122k
    int r, rl;
9867
122k
    int s, sl;
9868
122k
    int cur, l;
9869
122k
    int count = 0;
9870
122k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
39.1k
                    XML_MAX_HUGE_LENGTH :
9872
122k
                    XML_MAX_TEXT_LENGTH;
9873
9874
122k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
122k
    SKIP(3);
9877
9878
122k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
122k
    SKIP(6);
9881
9882
122k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
122k
    r = CUR_CHAR(rl);
9884
122k
    if (!IS_CHAR(r)) {
9885
1.73k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
1.73k
        goto out;
9887
1.73k
    }
9888
120k
    NEXTL(rl);
9889
120k
    s = CUR_CHAR(sl);
9890
120k
    if (!IS_CHAR(s)) {
9891
2.46k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
2.46k
        goto out;
9893
2.46k
    }
9894
118k
    NEXTL(sl);
9895
118k
    cur = CUR_CHAR(l);
9896
118k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
118k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
37.1M
    while (IS_CHAR(cur) &&
9902
37.1M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
37.0M
  if (len + 5 >= size) {
9904
92.5k
      xmlChar *tmp;
9905
9906
92.5k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
92.5k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
92.5k
      buf = tmp;
9912
92.5k
      size *= 2;
9913
92.5k
  }
9914
37.0M
  COPY_BUF(rl,buf,len,r);
9915
37.0M
  r = s;
9916
37.0M
  rl = sl;
9917
37.0M
  s = cur;
9918
37.0M
  sl = l;
9919
37.0M
  count++;
9920
37.0M
  if (count > 50) {
9921
681k
      SHRINK;
9922
681k
      GROW;
9923
681k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
681k
      count = 0;
9927
681k
  }
9928
37.0M
  NEXTL(l);
9929
37.0M
  cur = CUR_CHAR(l);
9930
37.0M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
37.0M
    }
9936
118k
    buf[len] = 0;
9937
118k
    if (cur != '>') {
9938
8.45k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
8.45k
                       "CData section not finished\n%.50s\n", buf);
9940
8.45k
        goto out;
9941
8.45k
    }
9942
109k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
109k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
76.6k
  if (ctxt->sax->cdataBlock != NULL)
9949
43.4k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
33.1k
  else if (ctxt->sax->characters != NULL)
9951
33.1k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
76.6k
    }
9953
9954
122k
out:
9955
122k
    if (ctxt->instate != XML_PARSER_EOF)
9956
122k
        ctxt->instate = XML_PARSER_CONTENT;
9957
122k
    xmlFree(buf);
9958
122k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
468k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
468k
    int nameNr = ctxt->nameNr;
9971
9972
468k
    GROW;
9973
89.7M
    while ((RAW != 0) &&
9974
89.7M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
89.3M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
89.3M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
79.4k
      xmlParsePI(ctxt);
9982
79.4k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
89.2M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
122k
      xmlParseCDSect(ctxt);
9990
122k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
89.1M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
89.1M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
539k
      xmlParseComment(ctxt);
9998
539k
      ctxt->instate = XML_PARSER_CONTENT;
9999
539k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
88.5M
  else if (*cur == '<') {
10005
41.0M
            if (NXT(1) == '/') {
10006
15.2M
                if (ctxt->nameNr <= nameNr)
10007
58.7k
                    break;
10008
15.1M
          xmlParseElementEnd(ctxt);
10009
25.8M
            } else {
10010
25.8M
          xmlParseElementStart(ctxt);
10011
25.8M
            }
10012
41.0M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
47.5M
  else if (*cur == '&') {
10020
7.21M
      xmlParseReference(ctxt);
10021
7.21M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
40.2M
  else {
10027
40.2M
      xmlParseCharData(ctxt, 0);
10028
40.2M
  }
10029
10030
89.2M
  GROW;
10031
89.2M
  SHRINK;
10032
89.2M
    }
10033
468k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
339k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
339k
    int nameNr = ctxt->nameNr;
10047
10048
339k
    xmlParseContentInternal(ctxt);
10049
10050
339k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
5.48k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
5.48k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
5.48k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
5.48k
                "Premature end of data in tag %s line %d\n",
10055
5.48k
    name, line, NULL);
10056
5.48k
    }
10057
339k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
195k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
195k
    if (xmlParseElementStart(ctxt) != 0)
10078
66.0k
        return;
10079
10080
129k
    xmlParseContentInternal(ctxt);
10081
129k
    if (ctxt->instate == XML_PARSER_EOF)
10082
679
  return;
10083
10084
128k
    if (CUR == 0) {
10085
71.2k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
71.2k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
71.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
71.2k
                "Premature end of data in tag %s line %d\n",
10089
71.2k
    name, line, NULL);
10090
71.2k
        return;
10091
71.2k
    }
10092
10093
57.1k
    xmlParseElementEnd(ctxt);
10094
57.1k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
26.0M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
26.0M
    const xmlChar *name;
10108
26.0M
    const xmlChar *prefix = NULL;
10109
26.0M
    const xmlChar *URI = NULL;
10110
26.0M
    xmlParserNodeInfo node_info;
10111
26.0M
    int line, tlen = 0;
10112
26.0M
    xmlNodePtr ret;
10113
26.0M
    int nsNr = ctxt->nsNr;
10114
10115
26.0M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
26.0M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
90
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
90
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
90
        xmlParserMaxDepth);
10120
90
  xmlHaltParser(ctxt);
10121
90
  return(-1);
10122
90
    }
10123
10124
    /* Capture start position */
10125
26.0M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
26.0M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
26.0M
    else if (*ctxt->space == -2)
10134
3.31M
  spacePush(ctxt, -1);
10135
22.7M
    else
10136
22.7M
  spacePush(ctxt, *ctxt->space);
10137
10138
26.0M
    line = ctxt->input->line;
10139
26.0M
#ifdef LIBXML_SAX1_ENABLED
10140
26.0M
    if (ctxt->sax2)
10141
15.9M
#endif /* LIBXML_SAX1_ENABLED */
10142
15.9M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
10.0M
#ifdef LIBXML_SAX1_ENABLED
10144
10.0M
    else
10145
10.0M
  name = xmlParseStartTag(ctxt);
10146
26.0M
#endif /* LIBXML_SAX1_ENABLED */
10147
26.0M
    if (ctxt->instate == XML_PARSER_EOF)
10148
849
  return(-1);
10149
26.0M
    if (name == NULL) {
10150
678k
  spacePop(ctxt);
10151
678k
        return(-1);
10152
678k
    }
10153
25.3M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
25.3M
    ret = ctxt->node;
10155
10156
25.3M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
25.3M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
25.3M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
25.3M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
25.3M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
8.76M
        SKIP(2);
10172
8.76M
  if (ctxt->sax2) {
10173
5.78M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
5.78M
    (!ctxt->disableSAX))
10175
3.46M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
5.78M
#ifdef LIBXML_SAX1_ENABLED
10177
5.78M
  } else {
10178
2.97M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
2.97M
    (!ctxt->disableSAX))
10180
2.48M
    ctxt->sax->endElement(ctxt->userData, name);
10181
2.97M
#endif /* LIBXML_SAX1_ENABLED */
10182
2.97M
  }
10183
8.76M
  namePop(ctxt);
10184
8.76M
  spacePop(ctxt);
10185
8.76M
  if (nsNr != ctxt->nsNr)
10186
10.3k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
8.76M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
8.76M
  return(1);
10195
8.76M
    }
10196
16.5M
    if (RAW == '>') {
10197
15.7M
        NEXT1;
10198
15.7M
    } else {
10199
818k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
818k
         "Couldn't find end of Start Tag %s line %d\n",
10201
818k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
818k
  nodePop(ctxt);
10207
818k
  namePop(ctxt);
10208
818k
  spacePop(ctxt);
10209
818k
  if (nsNr != ctxt->nsNr)
10210
69.5k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
818k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
818k
  return(-1);
10223
818k
    }
10224
10225
15.7M
    return(0);
10226
16.5M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
15.2M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
15.2M
    xmlParserNodeInfo node_info;
10237
15.2M
    xmlNodePtr ret = ctxt->node;
10238
10239
15.2M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
15.2M
    if (ctxt->sax2) {
10249
9.15M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
9.15M
  namePop(ctxt);
10251
9.15M
    }
10252
6.09M
#ifdef LIBXML_SAX1_ENABLED
10253
6.09M
    else
10254
6.09M
  xmlParseEndTag1(ctxt, 0);
10255
15.2M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
15.2M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
15.2M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
401k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
401k
    xmlChar *buf = NULL;
10286
401k
    int len = 0;
10287
401k
    int size = 10;
10288
401k
    xmlChar cur;
10289
10290
401k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
401k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
401k
    cur = CUR;
10296
401k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
4.76k
  xmlFree(buf);
10298
4.76k
  return(NULL);
10299
4.76k
    }
10300
396k
    buf[len++] = cur;
10301
396k
    NEXT;
10302
396k
    cur=CUR;
10303
396k
    if (cur != '.') {
10304
4.94k
  xmlFree(buf);
10305
4.94k
  return(NULL);
10306
4.94k
    }
10307
391k
    buf[len++] = cur;
10308
391k
    NEXT;
10309
391k
    cur=CUR;
10310
1.67M
    while ((cur >= '0') && (cur <= '9')) {
10311
1.28M
  if (len + 1 >= size) {
10312
1.65k
      xmlChar *tmp;
10313
10314
1.65k
      size *= 2;
10315
1.65k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.65k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.65k
      buf = tmp;
10322
1.65k
  }
10323
1.28M
  buf[len++] = cur;
10324
1.28M
  NEXT;
10325
1.28M
  cur=CUR;
10326
1.28M
    }
10327
391k
    buf[len] = 0;
10328
391k
    return(buf);
10329
391k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
470k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
470k
    xmlChar *version = NULL;
10349
10350
470k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
412k
  SKIP(7);
10352
412k
  SKIP_BLANKS;
10353
412k
  if (RAW != '=') {
10354
5.31k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
5.31k
      return(NULL);
10356
5.31k
        }
10357
406k
  NEXT;
10358
406k
  SKIP_BLANKS;
10359
406k
  if (RAW == '"') {
10360
344k
      NEXT;
10361
344k
      version = xmlParseVersionNum(ctxt);
10362
344k
      if (RAW != '"') {
10363
17.2k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
17.2k
      } else
10365
327k
          NEXT;
10366
344k
  } else if (RAW == '\''){
10367
57.2k
      NEXT;
10368
57.2k
      version = xmlParseVersionNum(ctxt);
10369
57.2k
      if (RAW != '\'') {
10370
1.43k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
1.43k
      } else
10372
55.8k
          NEXT;
10373
57.2k
  } else {
10374
5.08k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
5.08k
  }
10376
406k
    }
10377
464k
    return(version);
10378
470k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
205k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
205k
    xmlChar *buf = NULL;
10395
205k
    int len = 0;
10396
205k
    int size = 10;
10397
205k
    xmlChar cur;
10398
10399
205k
    cur = CUR;
10400
205k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
205k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
204k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
204k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
204k
  buf[len++] = cur;
10409
204k
  NEXT;
10410
204k
  cur = CUR;
10411
2.76M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
2.76M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
2.76M
         ((cur >= '0') && (cur <= '9')) ||
10414
2.76M
         (cur == '.') || (cur == '_') ||
10415
2.76M
         (cur == '-')) {
10416
2.56M
      if (len + 1 >= size) {
10417
78.9k
          xmlChar *tmp;
10418
10419
78.9k
    size *= 2;
10420
78.9k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
78.9k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
78.9k
    buf = tmp;
10427
78.9k
      }
10428
2.56M
      buf[len++] = cur;
10429
2.56M
      NEXT;
10430
2.56M
      cur = CUR;
10431
2.56M
      if (cur == 0) {
10432
679
          SHRINK;
10433
679
    GROW;
10434
679
    cur = CUR;
10435
679
      }
10436
2.56M
        }
10437
204k
  buf[len] = 0;
10438
204k
    } else {
10439
811
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
811
    }
10441
205k
    return(buf);
10442
205k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
339k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
339k
    xmlChar *encoding = NULL;
10462
10463
339k
    SKIP_BLANKS;
10464
339k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
208k
  SKIP(8);
10466
208k
  SKIP_BLANKS;
10467
208k
  if (RAW != '=') {
10468
1.46k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.46k
      return(NULL);
10470
1.46k
        }
10471
206k
  NEXT;
10472
206k
  SKIP_BLANKS;
10473
206k
  if (RAW == '"') {
10474
161k
      NEXT;
10475
161k
      encoding = xmlParseEncName(ctxt);
10476
161k
      if (RAW != '"') {
10477
6.17k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
6.17k
    xmlFree((xmlChar *) encoding);
10479
6.17k
    return(NULL);
10480
6.17k
      } else
10481
154k
          NEXT;
10482
161k
  } else if (RAW == '\''){
10483
44.7k
      NEXT;
10484
44.7k
      encoding = xmlParseEncName(ctxt);
10485
44.7k
      if (RAW != '\'') {
10486
511
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
511
    xmlFree((xmlChar *) encoding);
10488
511
    return(NULL);
10489
511
      } else
10490
44.1k
          NEXT;
10491
44.7k
  } else {
10492
1.15k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
1.15k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
200k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
49.9k
      xmlFree((xmlChar *) encoding);
10500
49.9k
            return(NULL);
10501
49.9k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
150k
        if ((encoding != NULL) &&
10508
150k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
149k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
4.19k
      if ((ctxt->encoding == NULL) &&
10517
4.19k
          (ctxt->input->buf != NULL) &&
10518
4.19k
          (ctxt->input->buf->encoder == NULL)) {
10519
4.18k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
4.18k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
4.18k
      }
10522
4.19k
      if (ctxt->encoding != NULL)
10523
7
    xmlFree((xmlChar *) ctxt->encoding);
10524
4.19k
      ctxt->encoding = encoding;
10525
4.19k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
146k
        else if ((encoding != NULL) &&
10530
146k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
145k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
71.7k
      if (ctxt->encoding != NULL)
10533
18
    xmlFree((xmlChar *) ctxt->encoding);
10534
71.7k
      ctxt->encoding = encoding;
10535
71.7k
  }
10536
74.3k
  else if (encoding != NULL) {
10537
73.4k
      xmlCharEncodingHandlerPtr handler;
10538
10539
73.4k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
73.4k
      ctxt->input->encoding = encoding;
10542
10543
73.4k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
73.4k
      if (handler != NULL) {
10545
72.3k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
234
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
234
        return(NULL);
10549
234
    }
10550
72.3k
      } else {
10551
1.09k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
1.09k
      "Unsupported encoding %s\n", encoding);
10553
1.09k
    return(NULL);
10554
1.09k
      }
10555
73.4k
  }
10556
150k
    }
10557
279k
    return(encoding);
10558
339k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
282k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
282k
    int standalone = -2;
10596
10597
282k
    SKIP_BLANKS;
10598
282k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
57.5k
  SKIP(10);
10600
57.5k
        SKIP_BLANKS;
10601
57.5k
  if (RAW != '=') {
10602
390
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
390
      return(standalone);
10604
390
        }
10605
57.1k
  NEXT;
10606
57.1k
  SKIP_BLANKS;
10607
57.1k
        if (RAW == '\''){
10608
44.7k
      NEXT;
10609
44.7k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
38.7k
          standalone = 0;
10611
38.7k
                SKIP(2);
10612
38.7k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
6.03k
                 (NXT(2) == 's')) {
10614
5.39k
          standalone = 1;
10615
5.39k
    SKIP(3);
10616
5.39k
            } else {
10617
635
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
635
      }
10619
44.7k
      if (RAW != '\'') {
10620
925
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
925
      } else
10622
43.8k
          NEXT;
10623
44.7k
  } else if (RAW == '"'){
10624
12.1k
      NEXT;
10625
12.1k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
6.40k
          standalone = 0;
10627
6.40k
    SKIP(2);
10628
6.40k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
5.74k
                 (NXT(2) == 's')) {
10630
5.29k
          standalone = 1;
10631
5.29k
                SKIP(3);
10632
5.29k
            } else {
10633
454
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
454
      }
10635
12.1k
      if (RAW != '"') {
10636
667
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
667
      } else
10638
11.4k
          NEXT;
10639
12.1k
  } else {
10640
300
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
300
        }
10642
57.1k
    }
10643
281k
    return(standalone);
10644
282k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
445k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
445k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
445k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
445k
    SKIP(5);
10672
10673
445k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
445k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
445k
    version = xmlParseVersionInfo(ctxt);
10683
445k
    if (version == NULL) {
10684
75.5k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
370k
    } else {
10686
370k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
6.81k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
2.06k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
2.06k
                "Unsupported version '%s'\n",
10693
2.06k
                version);
10694
4.75k
      } else {
10695
4.75k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
4.08k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
4.08k
                      "Unsupported version '%s'\n",
10698
4.08k
          version, NULL);
10699
4.08k
    } else {
10700
663
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
663
              "Unsupported version '%s'\n",
10702
663
              version);
10703
663
    }
10704
4.75k
      }
10705
6.81k
  }
10706
370k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
370k
  ctxt->version = version;
10709
370k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
445k
    if (!IS_BLANK_CH(RAW)) {
10715
218k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
131k
      SKIP(2);
10717
131k
      return;
10718
131k
  }
10719
87.4k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
87.4k
    }
10721
314k
    xmlParseEncodingDecl(ctxt);
10722
314k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
314k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
982
        return;
10728
982
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
313k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
33.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
31.7k
      SKIP(2);
10736
31.7k
      return;
10737
31.7k
  }
10738
2.24k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
2.24k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
282k
    GROW;
10745
10746
282k
    SKIP_BLANKS;
10747
282k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
282k
    SKIP_BLANKS;
10750
282k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
160k
        SKIP(2);
10752
160k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
1.63k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
1.63k
  NEXT;
10756
119k
    } else {
10757
119k
        int c;
10758
10759
119k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
5.31M
        while ((c = CUR) != 0) {
10761
5.30M
            NEXT;
10762
5.30M
            if (c == '>')
10763
111k
                break;
10764
5.30M
        }
10765
119k
    }
10766
282k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
586k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
691k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
691k
        SKIP_BLANKS;
10783
691k
        GROW;
10784
691k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
61.8k
      xmlParsePI(ctxt);
10786
629k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
43.2k
      xmlParseComment(ctxt);
10788
586k
        } else {
10789
586k
            break;
10790
586k
        }
10791
691k
    }
10792
586k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
278k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
278k
    xmlChar start[4];
10812
278k
    xmlCharEncoding enc;
10813
10814
278k
    xmlInitParser();
10815
10816
278k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
278k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
278k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
278k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
278k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
278k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
278k
    if ((ctxt->encoding == NULL) &&
10835
278k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
273k
  start[0] = RAW;
10842
273k
  start[1] = NXT(1);
10843
273k
  start[2] = NXT(2);
10844
273k
  start[3] = NXT(3);
10845
273k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
273k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
161k
      xmlSwitchEncoding(ctxt, enc);
10848
161k
  }
10849
273k
    }
10850
10851
10852
278k
    if (CUR == 0) {
10853
2.11k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
2.11k
  return(-1);
10855
2.11k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
276k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
21.0k
       GROW;
10865
21.0k
    }
10866
276k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
149k
  xmlParseXMLDecl(ctxt);
10872
149k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
149k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
357
      return(-1);
10878
357
  }
10879
149k
  ctxt->standalone = ctxt->input->standalone;
10880
149k
  SKIP_BLANKS;
10881
149k
    } else {
10882
126k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
126k
    }
10884
275k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
258k
        ctxt->sax->startDocument(ctxt->userData);
10886
275k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
275k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
275k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
275k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
275k
    GROW;
10903
275k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
168k
  ctxt->inSubset = 1;
10906
168k
  xmlParseDocTypeDecl(ctxt);
10907
168k
  if (RAW == '[') {
10908
122k
      ctxt->instate = XML_PARSER_DTD;
10909
122k
      xmlParseInternalSubset(ctxt);
10910
122k
      if (ctxt->instate == XML_PARSER_EOF)
10911
37.3k
    return(-1);
10912
122k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
131k
  ctxt->inSubset = 2;
10918
131k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
131k
      (!ctxt->disableSAX))
10920
122k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
122k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
131k
  if (ctxt->instate == XML_PARSER_EOF)
10923
16.2k
      return(-1);
10924
115k
  ctxt->inSubset = 0;
10925
10926
115k
        xmlCleanSpecialAttr(ctxt);
10927
10928
115k
  ctxt->instate = XML_PARSER_PROLOG;
10929
115k
  xmlParseMisc(ctxt);
10930
115k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
222k
    GROW;
10936
222k
    if (RAW != '<') {
10937
27.2k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
27.2k
           "Start tag expected, '<' not found\n");
10939
195k
    } else {
10940
195k
  ctxt->instate = XML_PARSER_CONTENT;
10941
195k
  xmlParseElement(ctxt);
10942
195k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
195k
  xmlParseMisc(ctxt);
10949
10950
195k
  if (RAW != 0) {
10951
65.8k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
65.8k
  }
10953
195k
  ctxt->instate = XML_PARSER_EOF;
10954
195k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
222k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
222k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
222k
    if ((ctxt->myDoc != NULL) &&
10966
222k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
1.72k
  xmlFreeDoc(ctxt->myDoc);
10968
1.72k
  ctxt->myDoc = NULL;
10969
1.72k
    }
10970
10971
222k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
18.8k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
18.8k
  if (ctxt->valid)
10974
10.8k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
18.8k
  if (ctxt->nsWellFormed)
10976
17.7k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
18.8k
  if (ctxt->options & XML_PARSE_OLD10)
10978
3.54k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
18.8k
    }
10980
222k
    if (! ctxt->wellFormed) {
10981
203k
  ctxt->valid = 0;
10982
203k
  return(-1);
10983
203k
    }
10984
18.8k
    return(0);
10985
222k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
24.0M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
24.0M
    const xmlChar *cur;
11110
11111
24.0M
    if (ctxt->checkIndex == 0) {
11112
23.2M
        cur = ctxt->input->cur + 1;
11113
23.2M
    } else {
11114
751k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
751k
    }
11116
11117
24.0M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
773k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
773k
        return(0);
11120
23.2M
    } else {
11121
23.2M
        ctxt->checkIndex = 0;
11122
23.2M
        return(1);
11123
23.2M
    }
11124
24.0M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
3.87M
                     const char *str, size_t strLen) {
11138
3.87M
    const xmlChar *cur, *term;
11139
11140
3.87M
    if (ctxt->checkIndex == 0) {
11141
2.25M
        cur = ctxt->input->cur + startDelta;
11142
2.25M
    } else {
11143
1.61M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.61M
    }
11145
11146
3.87M
    term = BAD_CAST strstr((const char *) cur, str);
11147
3.87M
    if (term == NULL) {
11148
2.01M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
2.01M
        if ((size_t) (end - cur) < strLen)
11152
51.6k
            end = cur;
11153
1.96M
        else
11154
1.96M
            end -= strLen - 1;
11155
2.01M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
2.01M
    } else {
11157
1.86M
        ctxt->checkIndex = 0;
11158
1.86M
    }
11159
11160
3.87M
    return(term);
11161
3.87M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
38.6M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
38.6M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
38.6M
    const xmlChar *end = ctxt->input->end;
11173
11174
756M
    while (cur < end) {
11175
751M
        if ((*cur == '<') || (*cur == '&')) {
11176
33.5M
            ctxt->checkIndex = 0;
11177
33.5M
            return(1);
11178
33.5M
        }
11179
718M
        cur++;
11180
718M
    }
11181
11182
5.04M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
5.04M
    return(0);
11184
38.6M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
31.7M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
31.7M
    const xmlChar *cur;
11196
31.7M
    const xmlChar *end = ctxt->input->end;
11197
31.7M
    int state = ctxt->endCheckState;
11198
11199
31.7M
    if (ctxt->checkIndex == 0)
11200
26.5M
        cur = ctxt->input->cur + 1;
11201
5.21M
    else
11202
5.21M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
1.09G
    while (cur < end) {
11205
1.09G
        if (state) {
11206
587M
            if (*cur == state)
11207
34.0M
                state = 0;
11208
587M
        } else if (*cur == '\'' || *cur == '"') {
11209
34.1M
            state = *cur;
11210
470M
        } else if (*cur == '>') {
11211
26.4M
            ctxt->checkIndex = 0;
11212
26.4M
            ctxt->endCheckState = 0;
11213
26.4M
            return(1);
11214
26.4M
        }
11215
1.06G
        cur++;
11216
1.06G
    }
11217
11218
5.33M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
5.33M
    ctxt->endCheckState = state;
11220
5.33M
    return(0);
11221
31.7M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
1.76M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
1.76M
    const xmlChar *cur, *start;
11240
1.76M
    const xmlChar *end = ctxt->input->end;
11241
1.76M
    int state = ctxt->endCheckState;
11242
11243
1.76M
    if (ctxt->checkIndex == 0) {
11244
218k
        cur = ctxt->input->cur + 1;
11245
1.54M
    } else {
11246
1.54M
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
1.54M
    }
11248
1.76M
    start = cur;
11249
11250
332M
    while (cur < end) {
11251
331M
        if (state == '-') {
11252
19.7M
            if ((*cur == '-') &&
11253
19.7M
                (cur[1] == '-') &&
11254
19.7M
                (cur[2] == '>')) {
11255
260k
                state = 0;
11256
260k
                cur += 3;
11257
260k
                start = cur;
11258
260k
                continue;
11259
260k
            }
11260
19.7M
        }
11261
311M
        else if (state == ']') {
11262
233k
            if (*cur == '>') {
11263
182k
                ctxt->checkIndex = 0;
11264
182k
                ctxt->endCheckState = 0;
11265
182k
                return(1);
11266
182k
            }
11267
51.0k
            if (IS_BLANK_CH(*cur)) {
11268
23.2k
                state = ' ';
11269
27.7k
            } else if (*cur != ']') {
11270
8.88k
                state = 0;
11271
8.88k
                start = cur;
11272
8.88k
                continue;
11273
8.88k
            }
11274
51.0k
        }
11275
311M
        else if (state == ' ') {
11276
53.7k
            if (*cur == '>') {
11277
715
                ctxt->checkIndex = 0;
11278
715
                ctxt->endCheckState = 0;
11279
715
                return(1);
11280
715
            }
11281
53.0k
            if (!IS_BLANK_CH(*cur)) {
11282
22.3k
                state = 0;
11283
22.3k
                start = cur;
11284
22.3k
                continue;
11285
22.3k
            }
11286
53.0k
        }
11287
311M
        else if (state != 0) {
11288
215M
            if (*cur == state) {
11289
2.06M
                state = 0;
11290
2.06M
                start = cur + 1;
11291
2.06M
            }
11292
215M
        }
11293
95.5M
        else if (*cur == '<') {
11294
2.68M
            if ((cur[1] == '!') &&
11295
2.68M
                (cur[2] == '-') &&
11296
2.68M
                (cur[3] == '-')) {
11297
263k
                state = '-';
11298
263k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
263k
                start = cur;
11301
263k
                continue;
11302
263k
            }
11303
2.68M
        }
11304
92.8M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
2.29M
            state = *cur;
11306
2.29M
        }
11307
11308
330M
        cur++;
11309
330M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
1.58M
    if ((state == 0) || (state == '-')) {
11316
519k
        if (cur - start < 3)
11317
53.0k
            cur = start;
11318
466k
        else
11319
466k
            cur -= 3;
11320
519k
    }
11321
1.58M
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
1.58M
    ctxt->endCheckState = state;
11323
1.58M
    return(0);
11324
1.76M
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
1.21M
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
1.21M
    int ix;
11340
1.21M
    unsigned char c;
11341
1.21M
    int codepoint;
11342
11343
1.21M
    if ((utf == NULL) || (len <= 0))
11344
6.85k
        return(0);
11345
11346
30.6M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
30.5M
        c = utf[ix];
11348
30.5M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
26.8M
      if (c >= 0x20)
11350
25.3M
    ix++;
11351
1.51M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
1.25M
          ix++;
11353
252k
      else
11354
252k
          return(-ix);
11355
26.8M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
2.16M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
2.15M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
576k
          return(-ix);
11359
1.57M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
1.57M
      codepoint |= utf[ix+1] & 0x3f;
11361
1.57M
      if (!xmlIsCharQ(codepoint))
11362
7.70k
          return(-ix);
11363
1.56M
      ix += 2;
11364
1.56M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
465k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
441k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
441k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
30.6k
        return(-ix);
11369
410k
      codepoint = (utf[ix] & 0xf) << 12;
11370
410k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
410k
      codepoint |= utf[ix+2] & 0x3f;
11372
410k
      if (!xmlIsCharQ(codepoint))
11373
10.1k
          return(-ix);
11374
400k
      ix += 3;
11375
989k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
923k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
912k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
912k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
912k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
43.1k
        return(-ix);
11381
869k
      codepoint = (utf[ix] & 0x7) << 18;
11382
869k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
869k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
869k
      codepoint |= utf[ix+3] & 0x3f;
11385
869k
      if (!xmlIsCharQ(codepoint))
11386
18.2k
          return(-ix);
11387
850k
      ix += 4;
11388
850k
  } else       /* unknown encoding */
11389
65.8k
      return(-ix);
11390
30.5M
      }
11391
162k
      return(ix);
11392
1.21M
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
17.2M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
17.2M
    int ret = 0;
11406
17.2M
    int avail, tlen;
11407
17.2M
    xmlChar cur, next;
11408
11409
17.2M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
17.2M
    if ((ctxt->input != NULL) &&
11466
17.2M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
357k
        xmlParserInputShrink(ctxt->input);
11468
357k
    }
11469
11470
173M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
173M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
167k
      return(0);
11473
11474
173M
  if (ctxt->input == NULL) break;
11475
173M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
173M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
173M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
173M
          (ctxt->input->buf->raw != NULL) &&
11488
173M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
252k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
252k
                                                 ctxt->input);
11491
252k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
252k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
252k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
252k
                                      base, current);
11496
252k
      }
11497
173M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
173M
        (ctxt->input->cur - ctxt->input->base);
11499
173M
  }
11500
173M
        if (avail < 1)
11501
815k
      goto done;
11502
172M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
1.63M
            case XML_PARSER_START:
11509
1.63M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
411k
        xmlChar start[4];
11511
411k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
411k
        if (avail < 4)
11517
26.5k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
384k
        start[0] = RAW;
11527
384k
        start[1] = NXT(1);
11528
384k
        start[2] = NXT(2);
11529
384k
        start[3] = NXT(3);
11530
384k
        enc = xmlDetectCharEncoding(start, 4);
11531
384k
        xmlSwitchEncoding(ctxt, enc);
11532
384k
        break;
11533
411k
    }
11534
11535
1.21M
    if (avail < 2)
11536
1.10k
        goto done;
11537
1.21M
    cur = ctxt->input->cur[0];
11538
1.21M
    next = ctxt->input->cur[1];
11539
1.21M
    if (cur == 0) {
11540
3.27k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
3.27k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
3.27k
                  &xmlDefaultSAXLocator);
11543
3.27k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
3.27k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
3.27k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
3.27k
      ctxt->sax->endDocument(ctxt->userData);
11551
3.27k
        goto done;
11552
3.27k
    }
11553
1.21M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
1.00M
        if (avail < 5) goto done;
11556
1.00M
        if ((!terminate) &&
11557
1.00M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
675k
      goto done;
11559
325k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
325k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
325k
                  &xmlDefaultSAXLocator);
11562
325k
        if ((ctxt->input->cur[2] == 'x') &&
11563
325k
      (ctxt->input->cur[3] == 'm') &&
11564
325k
      (ctxt->input->cur[4] == 'l') &&
11565
325k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
296k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
296k
      xmlParseXMLDecl(ctxt);
11572
296k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
625
          xmlHaltParser(ctxt);
11578
625
          return(0);
11579
625
      }
11580
295k
      ctxt->standalone = ctxt->input->standalone;
11581
295k
      if ((ctxt->encoding == NULL) &&
11582
295k
          (ctxt->input->encoding != NULL))
11583
46.4k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
295k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
295k
          (!ctxt->disableSAX))
11586
263k
          ctxt->sax->startDocument(ctxt->userData);
11587
295k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
295k
        } else {
11593
28.7k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
28.7k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
28.7k
          (!ctxt->disableSAX))
11596
28.7k
          ctxt->sax->startDocument(ctxt->userData);
11597
28.7k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
28.7k
        }
11603
325k
    } else {
11604
213k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
213k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
213k
                  &xmlDefaultSAXLocator);
11607
213k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
213k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
213k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
213k
            (!ctxt->disableSAX))
11614
213k
      ctxt->sax->startDocument(ctxt->userData);
11615
213k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
213k
    }
11621
538k
    break;
11622
34.8M
            case XML_PARSER_START_TAG: {
11623
34.8M
          const xmlChar *name;
11624
34.8M
    const xmlChar *prefix = NULL;
11625
34.8M
    const xmlChar *URI = NULL;
11626
34.8M
                int line = ctxt->input->line;
11627
34.8M
    int nsNr = ctxt->nsNr;
11628
11629
34.8M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
34.8M
    cur = ctxt->input->cur[0];
11632
34.8M
          if (cur != '<') {
11633
23.2k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
23.2k
        xmlHaltParser(ctxt);
11635
23.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
23.2k
      ctxt->sax->endDocument(ctxt->userData);
11637
23.2k
        goto done;
11638
23.2k
    }
11639
34.8M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
5.17M
                    goto done;
11641
29.6M
    if (ctxt->spaceNr == 0)
11642
237k
        spacePush(ctxt, -1);
11643
29.4M
    else if (*ctxt->space == -2)
11644
3.11M
        spacePush(ctxt, -1);
11645
26.3M
    else
11646
26.3M
        spacePush(ctxt, *ctxt->space);
11647
29.6M
#ifdef LIBXML_SAX1_ENABLED
11648
29.6M
    if (ctxt->sax2)
11649
16.7M
#endif /* LIBXML_SAX1_ENABLED */
11650
16.7M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
12.9M
#ifdef LIBXML_SAX1_ENABLED
11652
12.9M
    else
11653
12.9M
        name = xmlParseStartTag(ctxt);
11654
29.6M
#endif /* LIBXML_SAX1_ENABLED */
11655
29.6M
    if (ctxt->instate == XML_PARSER_EOF)
11656
1.44k
        goto done;
11657
29.6M
    if (name == NULL) {
11658
31.2k
        spacePop(ctxt);
11659
31.2k
        xmlHaltParser(ctxt);
11660
31.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
31.2k
      ctxt->sax->endDocument(ctxt->userData);
11662
31.2k
        goto done;
11663
31.2k
    }
11664
29.6M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
29.6M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
29.6M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
29.6M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
29.6M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
10.3M
        SKIP(2);
11680
11681
10.3M
        if (ctxt->sax2) {
11682
6.09M
      if ((ctxt->sax != NULL) &&
11683
6.09M
          (ctxt->sax->endElementNs != NULL) &&
11684
6.09M
          (!ctxt->disableSAX))
11685
6.09M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
6.09M
                                  prefix, URI);
11687
6.09M
      if (ctxt->nsNr - nsNr > 0)
11688
10.3k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
6.09M
#ifdef LIBXML_SAX1_ENABLED
11690
6.09M
        } else {
11691
4.25M
      if ((ctxt->sax != NULL) &&
11692
4.25M
          (ctxt->sax->endElement != NULL) &&
11693
4.25M
          (!ctxt->disableSAX))
11694
4.25M
          ctxt->sax->endElement(ctxt->userData, name);
11695
4.25M
#endif /* LIBXML_SAX1_ENABLED */
11696
4.25M
        }
11697
10.3M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
10.3M
        spacePop(ctxt);
11700
10.3M
        if (ctxt->nameNr == 0) {
11701
10.5k
      ctxt->instate = XML_PARSER_EPILOG;
11702
10.3M
        } else {
11703
10.3M
      ctxt->instate = XML_PARSER_CONTENT;
11704
10.3M
        }
11705
10.3M
        break;
11706
10.3M
    }
11707
19.2M
    if (RAW == '>') {
11708
18.0M
        NEXT;
11709
18.0M
    } else {
11710
1.25M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
1.25M
           "Couldn't find end of Start Tag %s\n",
11712
1.25M
           name);
11713
1.25M
        nodePop(ctxt);
11714
1.25M
        spacePop(ctxt);
11715
1.25M
    }
11716
19.2M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
19.2M
    ctxt->instate = XML_PARSER_CONTENT;
11719
19.2M
                break;
11720
29.6M
      }
11721
113M
            case XML_PARSER_CONTENT: {
11722
113M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
517k
        goto done;
11724
113M
    cur = ctxt->input->cur[0];
11725
113M
    next = ctxt->input->cur[1];
11726
11727
113M
    if ((cur == '<') && (next == '/')) {
11728
17.4M
        ctxt->instate = XML_PARSER_END_TAG;
11729
17.4M
        break;
11730
95.7M
          } else if ((cur == '<') && (next == '?')) {
11731
316k
        if ((!terminate) &&
11732
316k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
218k
      goto done;
11734
98.5k
        xmlParsePI(ctxt);
11735
98.5k
        ctxt->instate = XML_PARSER_CONTENT;
11736
95.4M
    } else if ((cur == '<') && (next != '!')) {
11737
29.3M
        ctxt->instate = XML_PARSER_START_TAG;
11738
29.3M
        break;
11739
66.0M
    } else if ((cur == '<') && (next == '!') &&
11740
66.0M
               (ctxt->input->cur[2] == '-') &&
11741
66.0M
         (ctxt->input->cur[3] == '-')) {
11742
989k
        if ((!terminate) &&
11743
989k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
422k
      goto done;
11745
566k
        xmlParseComment(ctxt);
11746
566k
        ctxt->instate = XML_PARSER_CONTENT;
11747
65.1M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
65.1M
        (ctxt->input->cur[2] == '[') &&
11749
65.1M
        (ctxt->input->cur[3] == 'C') &&
11750
65.1M
        (ctxt->input->cur[4] == 'D') &&
11751
65.1M
        (ctxt->input->cur[5] == 'A') &&
11752
65.1M
        (ctxt->input->cur[6] == 'T') &&
11753
65.1M
        (ctxt->input->cur[7] == 'A') &&
11754
65.1M
        (ctxt->input->cur[8] == '[')) {
11755
113k
        SKIP(9);
11756
113k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
113k
        break;
11758
64.9M
    } else if ((cur == '<') && (next == '!') &&
11759
64.9M
               (avail < 9)) {
11760
25.9k
        goto done;
11761
64.9M
    } else if (cur == '<') {
11762
447k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
447k
                    "detected an error in element content\n");
11764
447k
                    SKIP(1);
11765
64.5M
    } else if (cur == '&') {
11766
11.5M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
236k
      goto done;
11768
11.3M
        xmlParseReference(ctxt);
11769
52.9M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
52.9M
        if ((ctxt->inputNr == 1) &&
11783
52.9M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
39.2M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
5.04M
          goto done;
11786
39.2M
                    }
11787
47.9M
                    ctxt->checkIndex = 0;
11788
47.9M
        xmlParseCharData(ctxt, 0);
11789
47.9M
    }
11790
60.3M
    break;
11791
113M
      }
11792
60.3M
            case XML_PARSER_END_TAG:
11793
18.0M
    if (avail < 2)
11794
0
        goto done;
11795
18.0M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
537k
        goto done;
11797
17.4M
    if (ctxt->sax2) {
11798
9.68M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
9.68M
        nameNsPop(ctxt);
11800
9.68M
    }
11801
7.78M
#ifdef LIBXML_SAX1_ENABLED
11802
7.78M
      else
11803
7.78M
        xmlParseEndTag1(ctxt, 0);
11804
17.4M
#endif /* LIBXML_SAX1_ENABLED */
11805
17.4M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
17.4M
    } else if (ctxt->nameNr == 0) {
11808
63.7k
        ctxt->instate = XML_PARSER_EPILOG;
11809
17.4M
    } else {
11810
17.4M
        ctxt->instate = XML_PARSER_CONTENT;
11811
17.4M
    }
11812
17.4M
    break;
11813
1.43M
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
1.43M
    const xmlChar *term;
11819
11820
1.43M
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
16.3k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
16.3k
                                           "]]>");
11827
1.41M
                } else {
11828
1.41M
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
1.41M
                }
11830
11831
1.43M
    if (term == NULL) {
11832
568k
        int tmp, size;
11833
11834
568k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
4.50k
                        size = ctxt->input->end - ctxt->input->cur;
11837
563k
                    } else {
11838
563k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
215k
                            goto done;
11840
348k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
348k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
348k
                    }
11844
352k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
352k
                    if (tmp <= 0) {
11846
280k
                        tmp = -tmp;
11847
280k
                        ctxt->input->cur += tmp;
11848
280k
                        goto encoding_error;
11849
280k
                    }
11850
72.3k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
72.3k
                        if (ctxt->sax->cdataBlock != NULL)
11852
34.8k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
34.8k
                                                  ctxt->input->cur, tmp);
11854
37.5k
                        else if (ctxt->sax->characters != NULL)
11855
37.5k
                            ctxt->sax->characters(ctxt->userData,
11856
37.5k
                                                  ctxt->input->cur, tmp);
11857
72.3k
                    }
11858
72.3k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
72.3k
                    SKIPL(tmp);
11861
866k
    } else {
11862
866k
                    int base = term - CUR_PTR;
11863
866k
        int tmp;
11864
11865
866k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
866k
        if ((tmp < 0) || (tmp != base)) {
11867
765k
      tmp = -tmp;
11868
765k
      ctxt->input->cur += tmp;
11869
765k
      goto encoding_error;
11870
765k
        }
11871
100k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
100k
            (ctxt->sax->cdataBlock != NULL) &&
11873
100k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
4.39k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
4.39k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
4.39k
                     "<![CDATA[", 9)))
11882
4.37k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
4.37k
                                 BAD_CAST "", 0);
11884
96.2k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
96.2k
      (!ctxt->disableSAX)) {
11886
93.8k
      if (ctxt->sax->cdataBlock != NULL)
11887
54.1k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
54.1k
              ctxt->input->cur, base);
11889
39.6k
      else if (ctxt->sax->characters != NULL)
11890
39.6k
          ctxt->sax->characters(ctxt->userData,
11891
39.6k
              ctxt->input->cur, base);
11892
93.8k
        }
11893
100k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
100k
        SKIPL(base + 3);
11896
100k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
100k
    }
11902
173k
    break;
11903
1.43M
      }
11904
848k
            case XML_PARSER_MISC:
11905
1.18M
            case XML_PARSER_PROLOG:
11906
1.26M
            case XML_PARSER_EPILOG:
11907
1.26M
    SKIP_BLANKS;
11908
1.26M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.26M
    else
11912
1.26M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.26M
                (ctxt->input->cur - ctxt->input->base);
11914
1.26M
    if (avail < 2)
11915
61.6k
        goto done;
11916
1.20M
    cur = ctxt->input->cur[0];
11917
1.20M
    next = ctxt->input->cur[1];
11918
1.20M
          if ((cur == '<') && (next == '?')) {
11919
138k
        if ((!terminate) &&
11920
138k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
36.1k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
102k
        xmlParsePI(ctxt);
11927
102k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
1.06M
    } else if ((cur == '<') && (next == '!') &&
11930
1.06M
        (ctxt->input->cur[2] == '-') &&
11931
1.06M
        (ctxt->input->cur[3] == '-')) {
11932
173k
        if ((!terminate) &&
11933
173k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
99.3k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
74.0k
        xmlParseComment(ctxt);
11940
74.0k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
895k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
895k
                    (cur == '<') && (next == '!') &&
11944
895k
        (ctxt->input->cur[2] == 'D') &&
11945
895k
        (ctxt->input->cur[3] == 'O') &&
11946
895k
        (ctxt->input->cur[4] == 'C') &&
11947
895k
        (ctxt->input->cur[5] == 'T') &&
11948
895k
        (ctxt->input->cur[6] == 'Y') &&
11949
895k
        (ctxt->input->cur[7] == 'P') &&
11950
895k
        (ctxt->input->cur[8] == 'E')) {
11951
473k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
156k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
317k
        ctxt->inSubset = 1;
11958
317k
        xmlParseDocTypeDecl(ctxt);
11959
317k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
317k
        if (RAW == '[') {
11962
230k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
230k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
86.3k
      ctxt->inSubset = 2;
11972
86.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
86.3k
          (ctxt->sax->externalSubset != NULL))
11974
82.7k
          ctxt->sax->externalSubset(ctxt->userData,
11975
82.7k
            ctxt->intSubName, ctxt->extSubSystem,
11976
82.7k
            ctxt->extSubURI);
11977
86.3k
      ctxt->inSubset = 0;
11978
86.3k
      xmlCleanSpecialAttr(ctxt);
11979
86.3k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
86.3k
        }
11985
422k
    } else if ((cur == '<') && (next == '!') &&
11986
422k
               (avail <
11987
40.3k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
35.8k
        goto done;
11989
386k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
13.3k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
13.3k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
13.3k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
13.3k
      ctxt->sax->endDocument(ctxt->userData);
11998
13.3k
        goto done;
11999
372k
                } else {
12000
372k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
372k
    }
12006
866k
    break;
12007
1.80M
            case XML_PARSER_DTD: {
12008
1.80M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
1.58M
                    goto done;
12010
215k
    xmlParseInternalSubset(ctxt);
12011
215k
    if (ctxt->instate == XML_PARSER_EOF)
12012
51.1k
        goto done;
12013
164k
    ctxt->inSubset = 2;
12014
164k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
164k
        (ctxt->sax->externalSubset != NULL))
12016
159k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
159k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
164k
    ctxt->inSubset = 0;
12019
164k
    xmlCleanSpecialAttr(ctxt);
12020
164k
    if (ctxt->instate == XML_PARSER_EOF)
12021
12.6k
        goto done;
12022
151k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
151k
                break;
12028
164k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
172M
  }
12102
172M
    }
12103
16.0M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
16.0M
    return(ret);
12108
1.04M
encoding_error:
12109
1.04M
    {
12110
1.04M
        char buffer[150];
12111
12112
1.04M
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
1.04M
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
1.04M
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
1.04M
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
1.04M
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
1.04M
         BAD_CAST buffer, NULL);
12118
1.04M
    }
12119
1.04M
    return(0);
12120
17.2M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
28.1M
              int terminate) {
12136
28.1M
    int end_in_lf = 0;
12137
28.1M
    int remain = 0;
12138
12139
28.1M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
28.1M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
10.9M
        return(ctxt->errNo);
12143
17.1M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.20k
        return(-1);
12145
17.1M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
17.1M
    ctxt->progressive = 1;
12149
17.1M
    if (ctxt->instate == XML_PARSER_START)
12150
1.15M
        xmlDetectSAX2(ctxt);
12151
17.1M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
17.1M
        (chunk[size - 1] == '\r')) {
12153
90.1k
  end_in_lf = 1;
12154
90.1k
  size--;
12155
90.1k
    }
12156
12157
17.2M
xmldecl_done:
12158
12159
17.2M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
17.2M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
16.9M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
16.9M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
16.9M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
16.9M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
16.9M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
151k
            unsigned int len = 45;
12173
12174
151k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
151k
                               BAD_CAST "UTF-16")) ||
12176
151k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
38.7k
                               BAD_CAST "UTF16")))
12178
113k
                len = 90;
12179
38.7k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
38.7k
                                    BAD_CAST "UCS-4")) ||
12181
38.7k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
38.3k
                                    BAD_CAST "UCS4")))
12183
483
                len = 180;
12184
12185
151k
            if (ctxt->input->buf->rawconsumed < len)
12186
6.20k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
151k
            if ((unsigned int) size > len) {
12194
101k
                remain = size - len;
12195
101k
                size = len;
12196
101k
            } else {
12197
49.9k
                remain = 0;
12198
49.9k
            }
12199
151k
        }
12200
16.9M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
16.9M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
16.9M
  if (res < 0) {
12203
1.86k
      ctxt->errNo = XML_PARSER_EOF;
12204
1.86k
      xmlHaltParser(ctxt);
12205
1.86k
      return (XML_PARSER_EOF);
12206
1.86k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
16.9M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
346k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
346k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
346k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
346k
        (in->raw != NULL)) {
12216
27.4k
    int nbchars;
12217
27.4k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
27.4k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
27.4k
    nbchars = xmlCharEncInput(in, terminate);
12221
27.4k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
27.4k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
2.29k
        xmlGenericError(xmlGenericErrorContext,
12225
2.29k
            "xmlParseChunk: encoder error\n");
12226
2.29k
                    xmlHaltParser(ctxt);
12227
2.29k
        return(XML_ERR_INVALID_ENCODING);
12228
2.29k
    }
12229
27.4k
      }
12230
346k
  }
12231
346k
    }
12232
12233
17.2M
    if (remain != 0) {
12234
101k
        xmlParseTryOrFinish(ctxt, 0);
12235
17.1M
    } else {
12236
17.1M
        xmlParseTryOrFinish(ctxt, terminate);
12237
17.1M
    }
12238
17.2M
    if (ctxt->instate == XML_PARSER_EOF)
12239
138k
        return(ctxt->errNo);
12240
12241
17.1M
    if ((ctxt->input != NULL) &&
12242
17.1M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
17.1M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
17.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
17.1M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
169k
        return(ctxt->errNo);
12250
12251
16.9M
    if (remain != 0) {
12252
100k
        chunk += size;
12253
100k
        size = remain;
12254
100k
        remain = 0;
12255
100k
        goto xmldecl_done;
12256
100k
    }
12257
16.8M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
16.8M
        (ctxt->input->buf != NULL)) {
12259
89.1k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
89.1k
           ctxt->input);
12261
89.1k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
89.1k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
89.1k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
89.1k
            base, current);
12267
89.1k
    }
12268
16.8M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
166k
  int cur_avail = 0;
12273
12274
166k
  if (ctxt->input != NULL) {
12275
166k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
166k
      else
12279
166k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
166k
                    (ctxt->input->cur - ctxt->input->base);
12281
166k
  }
12282
12283
166k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
166k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
115k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
115k
  }
12287
166k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
1.38k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
1.38k
  }
12290
166k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
166k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
166k
    ctxt->sax->endDocument(ctxt->userData);
12293
166k
  }
12294
166k
  ctxt->instate = XML_PARSER_EOF;
12295
166k
    }
12296
16.8M
    if (ctxt->wellFormed == 0)
12297
8.82M
  return((xmlParserErrors) ctxt->errNo);
12298
8.00M
    else
12299
8.00M
        return(0);
12300
16.8M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
557k
                        const char *chunk, int size, const char *filename) {
12330
557k
    xmlParserCtxtPtr ctxt;
12331
557k
    xmlParserInputPtr inputStream;
12332
557k
    xmlParserInputBufferPtr buf;
12333
557k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
557k
    if ((chunk != NULL) && (size >= 4))
12339
273k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
557k
    buf = xmlAllocParserInputBuffer(enc);
12342
557k
    if (buf == NULL) return(NULL);
12343
12344
557k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
557k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
557k
    ctxt->dictNames = 1;
12351
557k
    if (filename == NULL) {
12352
278k
  ctxt->directory = NULL;
12353
278k
    } else {
12354
278k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
278k
    }
12356
12357
557k
    inputStream = xmlNewInputStream(ctxt);
12358
557k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
557k
    if (filename == NULL)
12365
278k
  inputStream->filename = NULL;
12366
278k
    else {
12367
278k
  inputStream->filename = (char *)
12368
278k
      xmlCanonicPath((const xmlChar *) filename);
12369
278k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
278k
    }
12376
557k
    inputStream->buf = buf;
12377
557k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
557k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
557k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
557k
    if ((size != 0) && (chunk != NULL) &&
12388
557k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
273k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
273k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
273k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
273k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
273k
    }
12399
12400
557k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
161k
        xmlSwitchEncoding(ctxt, enc);
12402
161k
    }
12403
12404
557k
    return(ctxt);
12405
557k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
509k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
509k
    if (ctxt == NULL)
12418
0
        return;
12419
509k
    ctxt->instate = XML_PARSER_EOF;
12420
509k
    ctxt->disableSAX = 1;
12421
580k
    while (ctxt->inputNr > 1)
12422
71.1k
        xmlFreeInputStream(inputPop(ctxt));
12423
509k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
509k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
509k
        if (ctxt->input->buf != NULL) {
12433
453k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
453k
            ctxt->input->buf = NULL;
12435
453k
        }
12436
509k
  ctxt->input->cur = BAD_CAST"";
12437
509k
        ctxt->input->length = 0;
12438
509k
  ctxt->input->base = ctxt->input->cur;
12439
509k
        ctxt->input->end = ctxt->input->cur;
12440
509k
    }
12441
509k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
279k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
279k
    if (ctxt == NULL)
12452
0
        return;
12453
279k
    xmlHaltParser(ctxt);
12454
279k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
279k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
783k
          const xmlChar *ID, xmlNodePtr *list) {
12832
783k
    xmlParserCtxtPtr ctxt;
12833
783k
    xmlDocPtr newDoc;
12834
783k
    xmlNodePtr newRoot;
12835
783k
    xmlParserErrors ret = XML_ERR_OK;
12836
783k
    xmlChar start[4];
12837
783k
    xmlCharEncoding enc;
12838
12839
783k
    if (((depth > 40) &&
12840
783k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
783k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
783k
    if (list != NULL)
12848
79.9k
        *list = NULL;
12849
783k
    if ((URL == NULL) && (ID == NULL))
12850
756
  return(XML_ERR_INTERNAL_ERROR);
12851
782k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
782k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
782k
                                             oldctxt);
12856
782k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
149k
    if (oldctxt != NULL) {
12858
149k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
149k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
149k
    }
12861
149k
    xmlDetectSAX2(ctxt);
12862
12863
149k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
149k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
149k
    newDoc->properties = XML_DOC_INTERNAL;
12869
149k
    if (doc) {
12870
149k
        newDoc->intSubset = doc->intSubset;
12871
149k
        newDoc->extSubset = doc->extSubset;
12872
149k
        if (doc->dict) {
12873
78.9k
            newDoc->dict = doc->dict;
12874
78.9k
            xmlDictReference(newDoc->dict);
12875
78.9k
        }
12876
149k
        if (doc->URL != NULL) {
12877
92.1k
            newDoc->URL = xmlStrdup(doc->URL);
12878
92.1k
        }
12879
149k
    }
12880
149k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
149k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
149k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
149k
    nodePush(ctxt, newDoc->children);
12891
149k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
149k
    } else {
12894
149k
        ctxt->myDoc = doc;
12895
149k
        newRoot->doc = doc;
12896
149k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
149k
    GROW;
12904
149k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
146k
  start[0] = RAW;
12906
146k
  start[1] = NXT(1);
12907
146k
  start[2] = NXT(2);
12908
146k
  start[3] = NXT(3);
12909
146k
  enc = xmlDetectCharEncoding(start, 4);
12910
146k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
9.72k
      xmlSwitchEncoding(ctxt, enc);
12912
9.72k
  }
12913
146k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
149k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
4.84k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
4.84k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
4.84k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
210
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
210
                           "Version mismatch between document and entity\n");
12927
210
        }
12928
4.84k
    }
12929
12930
149k
    ctxt->instate = XML_PARSER_CONTENT;
12931
149k
    ctxt->depth = depth;
12932
149k
    if (oldctxt != NULL) {
12933
149k
  ctxt->_private = oldctxt->_private;
12934
149k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
149k
  ctxt->validate = oldctxt->validate;
12936
149k
  ctxt->valid = oldctxt->valid;
12937
149k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
149k
        if (oldctxt->validate) {
12939
82.7k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
82.7k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
82.7k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
82.7k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
82.7k
        }
12944
149k
  ctxt->external = oldctxt->external;
12945
149k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
149k
        ctxt->dict = oldctxt->dict;
12947
149k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
149k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
149k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
149k
        ctxt->dictNames = oldctxt->dictNames;
12951
149k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
149k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
149k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
149k
  ctxt->record_info = oldctxt->record_info;
12955
149k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
149k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
149k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
149k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
149k
    xmlParseContent(ctxt);
12970
12971
149k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
1.26k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
148k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
149k
    if (ctxt->node != newDoc->children) {
12977
6.88k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
6.88k
    }
12979
12980
149k
    if (!ctxt->wellFormed) {
12981
32.8k
  ret = (xmlParserErrors)ctxt->errNo;
12982
32.8k
        if (oldctxt != NULL) {
12983
32.8k
            oldctxt->errNo = ctxt->errNo;
12984
32.8k
            oldctxt->wellFormed = 0;
12985
32.8k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
32.8k
        }
12987
116k
    } else {
12988
116k
  if (list != NULL) {
12989
15.9k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
15.9k
      cur = newDoc->children->children;
12996
15.9k
      *list = cur;
12997
3.13M
      while (cur != NULL) {
12998
3.12M
    cur->parent = NULL;
12999
3.12M
    cur = cur->next;
13000
3.12M
      }
13001
15.9k
            newDoc->children->children = NULL;
13002
15.9k
  }
13003
116k
  ret = XML_ERR_OK;
13004
116k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
149k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
149k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
149k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
149k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
149k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
149k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
149k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
149k
    }
13020
13021
149k
    if (oldctxt != NULL) {
13022
149k
        ctxt->dict = NULL;
13023
149k
        ctxt->attsDefault = NULL;
13024
149k
        ctxt->attsSpecial = NULL;
13025
149k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
149k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
149k
        oldctxt->validate = ctxt->validate;
13028
149k
        oldctxt->valid = ctxt->valid;
13029
149k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
149k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
149k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
149k
    }
13033
149k
    ctxt->node_seq.maximum = 0;
13034
149k
    ctxt->node_seq.length = 0;
13035
149k
    ctxt->node_seq.buffer = NULL;
13036
149k
    xmlFreeParserCtxt(ctxt);
13037
149k
    newDoc->intSubset = NULL;
13038
149k
    newDoc->extSubset = NULL;
13039
149k
    xmlFreeDoc(newDoc);
13040
13041
149k
    return(ret);
13042
149k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
198k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
198k
    xmlParserCtxtPtr ctxt;
13125
198k
    xmlDocPtr newDoc = NULL;
13126
198k
    xmlNodePtr newRoot;
13127
198k
    xmlSAXHandlerPtr oldsax = NULL;
13128
198k
    xmlNodePtr content = NULL;
13129
198k
    xmlNodePtr last = NULL;
13130
198k
    int size;
13131
198k
    xmlParserErrors ret = XML_ERR_OK;
13132
198k
#ifdef SAX2
13133
198k
    int i;
13134
198k
#endif
13135
13136
198k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
198k
        (oldctxt->depth >  100)) {
13138
84
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
84
                       "Maximum entity nesting depth exceeded");
13140
84
  return(XML_ERR_ENTITY_LOOP);
13141
84
    }
13142
13143
13144
198k
    if (lst != NULL)
13145
187k
        *lst = NULL;
13146
198k
    if (string == NULL)
13147
108
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
198k
    size = xmlStrlen(string);
13150
13151
198k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
198k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
189k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
189k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
189k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
189k
    else
13158
189k
  ctxt->userData = ctxt;
13159
189k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
189k
    ctxt->dict = oldctxt->dict;
13161
189k
    ctxt->input_id = oldctxt->input_id;
13162
189k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
189k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
189k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
189k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
190k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
832
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
832
    }
13171
189k
#endif
13172
13173
189k
    oldsax = ctxt->sax;
13174
189k
    ctxt->sax = oldctxt->sax;
13175
189k
    xmlDetectSAX2(ctxt);
13176
189k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
189k
    ctxt->options = oldctxt->options;
13178
13179
189k
    ctxt->_private = oldctxt->_private;
13180
189k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
189k
    } else {
13193
189k
  ctxt->myDoc = oldctxt->myDoc;
13194
189k
        content = ctxt->myDoc->children;
13195
189k
  last = ctxt->myDoc->last;
13196
189k
    }
13197
189k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
189k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
189k
    ctxt->myDoc->children = NULL;
13208
189k
    ctxt->myDoc->last = NULL;
13209
189k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
189k
    nodePush(ctxt, ctxt->myDoc->children);
13211
189k
    ctxt->instate = XML_PARSER_CONTENT;
13212
189k
    ctxt->depth = oldctxt->depth;
13213
13214
189k
    ctxt->validate = 0;
13215
189k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
189k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
158k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
158k
    }
13222
189k
    ctxt->dictNames = oldctxt->dictNames;
13223
189k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
189k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
189k
    xmlParseContent(ctxt);
13227
189k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
374
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
189k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
189k
    if (ctxt->node != ctxt->myDoc->children) {
13233
1.54k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
1.54k
    }
13235
13236
189k
    if (!ctxt->wellFormed) {
13237
22.3k
  ret = (xmlParserErrors)ctxt->errNo;
13238
22.3k
        oldctxt->errNo = ctxt->errNo;
13239
22.3k
        oldctxt->wellFormed = 0;
13240
22.3k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
167k
    } else {
13242
167k
        ret = XML_ERR_OK;
13243
167k
    }
13244
13245
189k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
165k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
165k
  cur = ctxt->myDoc->children->children;
13253
165k
  *lst = cur;
13254
479k
  while (cur != NULL) {
13255
314k
#ifdef LIBXML_VALID_ENABLED
13256
314k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
314k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
314k
    (cur->type == XML_ELEMENT_NODE)) {
13259
31.1k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
31.1k
      oldctxt->myDoc, cur);
13261
31.1k
      }
13262
314k
#endif /* LIBXML_VALID_ENABLED */
13263
314k
      cur->parent = NULL;
13264
314k
      cur = cur->next;
13265
314k
  }
13266
165k
  ctxt->myDoc->children->children = NULL;
13267
165k
    }
13268
189k
    if (ctxt->myDoc != NULL) {
13269
189k
  xmlFreeNode(ctxt->myDoc->children);
13270
189k
        ctxt->myDoc->children = content;
13271
189k
        ctxt->myDoc->last = last;
13272
189k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
189k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
189k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
189k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
189k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
189k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
189k
    }
13285
13286
189k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
189k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
189k
    ctxt->sax = oldsax;
13289
189k
    ctxt->dict = NULL;
13290
189k
    ctxt->attsDefault = NULL;
13291
189k
    ctxt->attsSpecial = NULL;
13292
189k
    xmlFreeParserCtxt(ctxt);
13293
189k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
189k
    return(ret);
13298
189k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
782k
        xmlParserCtxtPtr pctx) {
13783
782k
    xmlParserCtxtPtr ctxt;
13784
782k
    xmlParserInputPtr inputStream;
13785
782k
    char *directory = NULL;
13786
782k
    xmlChar *uri;
13787
13788
782k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
782k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
782k
    if (pctx != NULL) {
13794
782k
        ctxt->options = pctx->options;
13795
782k
        ctxt->_private = pctx->_private;
13796
782k
  ctxt->input_id = pctx->input_id;
13797
782k
    }
13798
13799
    /* Don't read from stdin. */
13800
782k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
197
        URL = BAD_CAST "./-";
13802
13803
782k
    uri = xmlBuildURI(URL, base);
13804
13805
782k
    if (uri == NULL) {
13806
16.4k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
16.4k
  if (inputStream == NULL) {
13808
15.4k
      xmlFreeParserCtxt(ctxt);
13809
15.4k
      return(NULL);
13810
15.4k
  }
13811
13812
936
  inputPush(ctxt, inputStream);
13813
13814
936
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
936
      directory = xmlParserGetDirectory((char *)URL);
13816
936
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
936
      ctxt->directory = directory;
13818
766k
    } else {
13819
766k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
766k
  if (inputStream == NULL) {
13821
617k
      xmlFree(uri);
13822
617k
      xmlFreeParserCtxt(ctxt);
13823
617k
      return(NULL);
13824
617k
  }
13825
13826
148k
  inputPush(ctxt, inputStream);
13827
13828
148k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
148k
      directory = xmlParserGetDirectory((char *)uri);
13830
148k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
148k
      ctxt->directory = directory;
13832
148k
  xmlFree(uri);
13833
148k
    }
13834
149k
    return(ctxt);
13835
782k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
476k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
476k
    xmlParserCtxtPtr ctxt;
14178
476k
    xmlParserInputPtr input;
14179
476k
    xmlParserInputBufferPtr buf;
14180
14181
476k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
476k
    if (size <= 0)
14184
8.57k
  return(NULL);
14185
14186
468k
    ctxt = xmlNewParserCtxt();
14187
468k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
468k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
468k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
468k
    input = xmlNewInputStream(ctxt);
14197
468k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
468k
    input->filename = NULL;
14204
468k
    input->buf = buf;
14205
468k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
468k
    inputPush(ctxt, input);
14208
468k
    return(ctxt);
14209
468k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
5.89G
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
5.89G
    if (xmlParserInitialized != 0)
14525
5.89G
  return;
14526
14527
3.52k
#ifdef LIBXML_THREAD_ENABLED
14528
3.52k
    __xmlGlobalInitMutexLock();
14529
3.52k
    if (xmlParserInitialized == 0) {
14530
3.52k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.52k
  xmlInitThreadsInternal();
14537
3.52k
  xmlInitGlobalsInternal();
14538
3.52k
  xmlInitMemoryInternal();
14539
3.52k
        __xmlInitializeDict();
14540
3.52k
  xmlInitEncodingInternal();
14541
3.52k
  xmlRegisterDefaultInputCallbacks();
14542
3.52k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.52k
  xmlRegisterDefaultOutputCallbacks();
14544
3.52k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.52k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.52k
  xmlInitXPathInternal();
14547
3.52k
#endif
14548
3.52k
  xmlParserInitialized = 1;
14549
3.52k
#ifdef LIBXML_THREAD_ENABLED
14550
3.52k
    }
14551
3.52k
    __xmlGlobalInitMutexUnlock();
14552
3.52k
#endif
14553
3.52k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
836k
{
14843
836k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
836k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
836k
    if (options & XML_PARSE_RECOVER) {
14851
446k
        ctxt->recovery = 1;
14852
446k
        options -= XML_PARSE_RECOVER;
14853
446k
  ctxt->options |= XML_PARSE_RECOVER;
14854
446k
    } else
14855
389k
        ctxt->recovery = 0;
14856
836k
    if (options & XML_PARSE_DTDLOAD) {
14857
607k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
607k
        options -= XML_PARSE_DTDLOAD;
14859
607k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
607k
    } else
14861
228k
        ctxt->loadsubset = 0;
14862
836k
    if (options & XML_PARSE_DTDATTR) {
14863
283k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
283k
        options -= XML_PARSE_DTDATTR;
14865
283k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
283k
    }
14867
836k
    if (options & XML_PARSE_NOENT) {
14868
515k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
515k
        options -= XML_PARSE_NOENT;
14871
515k
  ctxt->options |= XML_PARSE_NOENT;
14872
515k
    } else
14873
321k
        ctxt->replaceEntities = 0;
14874
836k
    if (options & XML_PARSE_PEDANTIC) {
14875
227k
        ctxt->pedantic = 1;
14876
227k
        options -= XML_PARSE_PEDANTIC;
14877
227k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
227k
    } else
14879
608k
        ctxt->pedantic = 0;
14880
836k
    if (options & XML_PARSE_NOBLANKS) {
14881
302k
        ctxt->keepBlanks = 0;
14882
302k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
302k
        options -= XML_PARSE_NOBLANKS;
14884
302k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
302k
    } else
14886
534k
        ctxt->keepBlanks = 1;
14887
836k
    if (options & XML_PARSE_DTDVALID) {
14888
358k
        ctxt->validate = 1;
14889
358k
        if (options & XML_PARSE_NOWARNING)
14890
259k
            ctxt->vctxt.warning = NULL;
14891
358k
        if (options & XML_PARSE_NOERROR)
14892
249k
            ctxt->vctxt.error = NULL;
14893
358k
        options -= XML_PARSE_DTDVALID;
14894
358k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
358k
    } else
14896
478k
        ctxt->validate = 0;
14897
836k
    if (options & XML_PARSE_NOWARNING) {
14898
344k
        ctxt->sax->warning = NULL;
14899
344k
        options -= XML_PARSE_NOWARNING;
14900
344k
    }
14901
836k
    if (options & XML_PARSE_NOERROR) {
14902
351k
        ctxt->sax->error = NULL;
14903
351k
        ctxt->sax->fatalError = NULL;
14904
351k
        options -= XML_PARSE_NOERROR;
14905
351k
    }
14906
836k
#ifdef LIBXML_SAX1_ENABLED
14907
836k
    if (options & XML_PARSE_SAX1) {
14908
326k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
326k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
326k
        ctxt->sax->startElementNs = NULL;
14911
326k
        ctxt->sax->endElementNs = NULL;
14912
326k
        ctxt->sax->initialized = 1;
14913
326k
        options -= XML_PARSE_SAX1;
14914
326k
  ctxt->options |= XML_PARSE_SAX1;
14915
326k
    }
14916
836k
#endif /* LIBXML_SAX1_ENABLED */
14917
836k
    if (options & XML_PARSE_NODICT) {
14918
320k
        ctxt->dictNames = 0;
14919
320k
        options -= XML_PARSE_NODICT;
14920
320k
  ctxt->options |= XML_PARSE_NODICT;
14921
516k
    } else {
14922
516k
        ctxt->dictNames = 1;
14923
516k
    }
14924
836k
    if (options & XML_PARSE_NOCDATA) {
14925
336k
        ctxt->sax->cdataBlock = NULL;
14926
336k
        options -= XML_PARSE_NOCDATA;
14927
336k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
336k
    }
14929
836k
    if (options & XML_PARSE_NSCLEAN) {
14930
380k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
380k
        options -= XML_PARSE_NSCLEAN;
14932
380k
    }
14933
836k
    if (options & XML_PARSE_NONET) {
14934
281k
  ctxt->options |= XML_PARSE_NONET;
14935
281k
        options -= XML_PARSE_NONET;
14936
281k
    }
14937
836k
    if (options & XML_PARSE_COMPACT) {
14938
447k
  ctxt->options |= XML_PARSE_COMPACT;
14939
447k
        options -= XML_PARSE_COMPACT;
14940
447k
    }
14941
836k
    if (options & XML_PARSE_OLD10) {
14942
272k
  ctxt->options |= XML_PARSE_OLD10;
14943
272k
        options -= XML_PARSE_OLD10;
14944
272k
    }
14945
836k
    if (options & XML_PARSE_NOBASEFIX) {
14946
304k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
304k
        options -= XML_PARSE_NOBASEFIX;
14948
304k
    }
14949
836k
    if (options & XML_PARSE_HUGE) {
14950
236k
  ctxt->options |= XML_PARSE_HUGE;
14951
236k
        options -= XML_PARSE_HUGE;
14952
236k
        if (ctxt->dict != NULL)
14953
236k
            xmlDictSetLimit(ctxt->dict, 0);
14954
236k
    }
14955
836k
    if (options & XML_PARSE_OLDSAX) {
14956
226k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
226k
        options -= XML_PARSE_OLDSAX;
14958
226k
    }
14959
836k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
297k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
297k
        options -= XML_PARSE_IGNORE_ENC;
14962
297k
    }
14963
836k
    if (options & XML_PARSE_BIG_LINES) {
14964
278k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
278k
        options -= XML_PARSE_BIG_LINES;
14966
278k
    }
14967
836k
    ctxt->linenumbers = 1;
14968
836k
    return (options);
14969
836k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
557k
{
14984
557k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
557k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
278k
{
15003
278k
    xmlDocPtr ret;
15004
15005
278k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
278k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
278k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
278k
        (ctxt->input->filename == NULL))
15015
278k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
278k
    xmlParseDocument(ctxt);
15017
278k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
162k
        ret = ctxt->myDoc;
15019
115k
    else {
15020
115k
        ret = NULL;
15021
115k
  if (ctxt->myDoc != NULL) {
15022
99.6k
      xmlFreeDoc(ctxt->myDoc);
15023
99.6k
  }
15024
115k
    }
15025
278k
    ctxt->myDoc = NULL;
15026
278k
    if (!reuse) {
15027
278k
  xmlFreeParserCtxt(ctxt);
15028
278k
    }
15029
15030
278k
    return (ret);
15031
278k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
278k
{
15096
278k
    xmlParserCtxtPtr ctxt;
15097
15098
278k
    xmlInitParser();
15099
278k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
278k
    if (ctxt == NULL)
15101
547
        return (NULL);
15102
278k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
278k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387