Coverage Report

Created: 2023-12-14 14:10

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
4.68M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
635
#define XML_PARSER_NON_LINEAR 10
129
130
44.8M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
193M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
8.93G
#define XML_PARSER_BUFFER_SIZE 100
147
866k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
64.0M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
119k
{
215
119k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
119k
        (ctxt->instate == XML_PARSER_EOF))
217
22
  return;
218
119k
    if (ctxt != NULL)
219
119k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
119k
    if (prefix == NULL)
222
78.8k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
78.8k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
78.8k
                        (const char *) localname, NULL, NULL, 0, 0,
225
78.8k
                        "Attribute %s redefined\n", localname);
226
40.4k
    else
227
40.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
40.4k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
40.4k
                        (const char *) prefix, (const char *) localname,
230
40.4k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
40.4k
                        localname);
232
119k
    if (ctxt != NULL) {
233
119k
  ctxt->wellFormed = 0;
234
119k
  if (ctxt->recovery == 0)
235
27.9k
      ctxt->disableSAX = 1;
236
119k
    }
237
119k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
6.78M
{
250
6.78M
    const char *errmsg;
251
252
6.78M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
6.78M
        (ctxt->instate == XML_PARSER_EOF))
254
31.0k
  return;
255
6.75M
    switch (error) {
256
138k
        case XML_ERR_INVALID_HEX_CHARREF:
257
138k
            errmsg = "CharRef: invalid hexadecimal value";
258
138k
            break;
259
265k
        case XML_ERR_INVALID_DEC_CHARREF:
260
265k
            errmsg = "CharRef: invalid decimal value";
261
265k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
3.01M
        case XML_ERR_INTERNAL_ERROR:
266
3.01M
            errmsg = "internal error";
267
3.01M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
16.1k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
16.1k
            errmsg = "PEReference: expecting ';'";
282
16.1k
            break;
283
1.65k
        case XML_ERR_ENTITY_LOOP:
284
1.65k
            errmsg = "Detected an entity reference loop";
285
1.65k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
6.36k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
6.36k
            errmsg = "PEReferences forbidden in internal subset";
291
6.36k
            break;
292
5.32k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
5.32k
            errmsg = "EntityValue: \" or ' expected";
294
5.32k
            break;
295
138k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
138k
            errmsg = "AttValue: \" or ' expected";
297
138k
            break;
298
648k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
648k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
648k
            break;
301
20.2k
        case XML_ERR_LITERAL_NOT_STARTED:
302
20.2k
            errmsg = "SystemLiteral \" or ' expected";
303
20.2k
            break;
304
20.0k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
20.0k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
20.0k
            break;
307
266k
        case XML_ERR_MISPLACED_CDATA_END:
308
266k
            errmsg = "Sequence ']]>' not allowed in content";
309
266k
            break;
310
17.2k
        case XML_ERR_URI_REQUIRED:
311
17.2k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
17.2k
            break;
313
3.00k
        case XML_ERR_PUBID_REQUIRED:
314
3.00k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
3.00k
            break;
316
92.4k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
92.4k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
92.4k
            break;
319
144k
        case XML_ERR_PI_NOT_STARTED:
320
144k
            errmsg = "xmlParsePI : no target name";
321
144k
            break;
322
16.9k
        case XML_ERR_RESERVED_XML_NAME:
323
16.9k
            errmsg = "Invalid PI name";
324
16.9k
            break;
325
1.40k
        case XML_ERR_NOTATION_NOT_STARTED:
326
1.40k
            errmsg = "NOTATION: Name expected here";
327
1.40k
            break;
328
11.4k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
11.4k
            errmsg = "'>' required to close NOTATION declaration";
330
11.4k
            break;
331
20.5k
        case XML_ERR_VALUE_REQUIRED:
332
20.5k
            errmsg = "Entity value required";
333
20.5k
            break;
334
3.81k
        case XML_ERR_URI_FRAGMENT:
335
3.81k
            errmsg = "Fragment not allowed";
336
3.81k
            break;
337
21.4k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
21.4k
            errmsg = "'(' required to start ATTLIST enumeration";
339
21.4k
            break;
340
2.58k
        case XML_ERR_NMTOKEN_REQUIRED:
341
2.58k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
2.58k
            break;
343
4.94k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
4.94k
            errmsg = "')' required to finish ATTLIST enumeration";
345
4.94k
            break;
346
5.55k
        case XML_ERR_MIXED_NOT_STARTED:
347
5.55k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
5.55k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
13.3k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
13.3k
            errmsg = "ContentDecl : Name or '(' expected";
354
13.3k
            break;
355
32.4k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
32.4k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
32.4k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
601k
        case XML_ERR_GT_REQUIRED:
363
601k
            errmsg = "expected '>'";
364
601k
            break;
365
526
        case XML_ERR_CONDSEC_INVALID:
366
526
            errmsg = "XML conditional section '[' expected";
367
526
            break;
368
23.0k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
23.0k
            errmsg = "Content error in the external subset";
370
23.0k
            break;
371
2.06k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
2.06k
            errmsg =
373
2.06k
                "conditional section INCLUDE or IGNORE keyword expected";
374
2.06k
            break;
375
3.18k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
3.18k
            errmsg = "XML conditional section not closed";
377
3.18k
            break;
378
577
        case XML_ERR_XMLDECL_NOT_STARTED:
379
577
            errmsg = "Text declaration '<?xml' required";
380
577
            break;
381
208k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
208k
            errmsg = "parsing XML declaration: '?>' expected";
383
208k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
396k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
396k
            errmsg = "EntityRef: expecting ';'";
389
396k
            break;
390
42.1k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
42.1k
            errmsg = "DOCTYPE improperly terminated";
392
42.1k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
10.5k
        case XML_ERR_EQUAL_REQUIRED:
397
10.5k
            errmsg = "expected '='";
398
10.5k
            break;
399
46.3k
        case XML_ERR_STRING_NOT_CLOSED:
400
46.3k
            errmsg = "String not closed expecting \" or '";
401
46.3k
            break;
402
9.29k
        case XML_ERR_STRING_NOT_STARTED:
403
9.29k
            errmsg = "String not started expecting ' or \"";
404
9.29k
            break;
405
1.30k
        case XML_ERR_ENCODING_NAME:
406
1.30k
            errmsg = "Invalid XML encoding name";
407
1.30k
            break;
408
3.13k
        case XML_ERR_STANDALONE_VALUE:
409
3.13k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
3.13k
            break;
411
34.0k
        case XML_ERR_DOCUMENT_EMPTY:
412
34.0k
            errmsg = "Document is empty";
413
34.0k
            break;
414
291k
        case XML_ERR_DOCUMENT_END:
415
291k
            errmsg = "Extra content at the end of the document";
416
291k
            break;
417
6.19k
        case XML_ERR_NOT_WELL_BALANCED:
418
6.19k
            errmsg = "chunk is not well balanced";
419
6.19k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
121k
        case XML_ERR_VERSION_MISSING:
424
121k
            errmsg = "Malformed declaration expecting version";
425
121k
            break;
426
58
        case XML_ERR_NAME_TOO_LONG:
427
58
            errmsg = "Name too long";
428
58
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
24.2k
        default:
435
24.2k
            errmsg = "Unregistered error message";
436
6.75M
    }
437
6.75M
    if (ctxt != NULL)
438
6.75M
  ctxt->errNo = error;
439
6.75M
    if (info == NULL) {
440
3.74M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
3.74M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
3.74M
                        errmsg);
443
3.74M
    } else {
444
3.01M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
3.01M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
3.01M
                        errmsg, info);
447
3.01M
    }
448
6.75M
    if (ctxt != NULL) {
449
6.75M
  ctxt->wellFormed = 0;
450
6.75M
  if (ctxt->recovery == 0)
451
854k
      ctxt->disableSAX = 1;
452
6.75M
    }
453
6.75M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
10.0M
{
467
10.0M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
10.0M
        (ctxt->instate == XML_PARSER_EOF))
469
70
  return;
470
10.0M
    if (ctxt != NULL)
471
10.0M
  ctxt->errNo = error;
472
10.0M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
10.0M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
10.0M
    if (ctxt != NULL) {
475
10.0M
  ctxt->wellFormed = 0;
476
10.0M
  if (ctxt->recovery == 0)
477
1.21M
      ctxt->disableSAX = 1;
478
10.0M
    }
479
10.0M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
1.00M
{
495
1.00M
    xmlStructuredErrorFunc schannel = NULL;
496
497
1.00M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
1.00M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
1.00M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
1.00M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
874k
        schannel = ctxt->sax->serror;
503
1.00M
    if (ctxt != NULL) {
504
1.00M
        __xmlRaiseError(schannel,
505
1.00M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
1.00M
                    ctxt->userData,
507
1.00M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
1.00M
                    XML_ERR_WARNING, NULL, 0,
509
1.00M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
1.00M
        msg, (const char *) str1, (const char *) str2);
511
1.00M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
1.00M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
27.9k
{
533
27.9k
    xmlStructuredErrorFunc schannel = NULL;
534
535
27.9k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
27.9k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
27.9k
    if (ctxt != NULL) {
539
27.9k
  ctxt->errNo = error;
540
27.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
18.5k
      schannel = ctxt->sax->serror;
542
27.9k
    }
543
27.9k
    if (ctxt != NULL) {
544
27.9k
        __xmlRaiseError(schannel,
545
27.9k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
27.9k
                    ctxt, NULL, XML_FROM_DTD, error,
547
27.9k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
27.9k
        (const char *) str2, NULL, 0, 0,
549
27.9k
        msg, (const char *) str1, (const char *) str2);
550
27.9k
  ctxt->valid = 0;
551
27.9k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
27.9k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
15.9M
{
573
15.9M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
15.9M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
15.9M
    if (ctxt != NULL)
577
15.9M
  ctxt->errNo = error;
578
15.9M
    __xmlRaiseError(NULL, NULL, NULL,
579
15.9M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
15.9M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
15.9M
    if (ctxt != NULL) {
582
15.9M
  ctxt->wellFormed = 0;
583
15.9M
  if (ctxt->recovery == 0)
584
545k
      ctxt->disableSAX = 1;
585
15.9M
    }
586
15.9M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
2.64M
{
604
2.64M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
2.64M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
2.64M
    if (ctxt != NULL)
608
2.64M
  ctxt->errNo = error;
609
2.64M
    __xmlRaiseError(NULL, NULL, NULL,
610
2.64M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
2.64M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
2.64M
        NULL, val, 0, msg, str1, val, str2);
613
2.64M
    if (ctxt != NULL) {
614
2.64M
  ctxt->wellFormed = 0;
615
2.64M
  if (ctxt->recovery == 0)
616
511k
      ctxt->disableSAX = 1;
617
2.64M
    }
618
2.64M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
6.39M
{
633
6.39M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
6.39M
        (ctxt->instate == XML_PARSER_EOF))
635
16
  return;
636
6.39M
    if (ctxt != NULL)
637
6.39M
  ctxt->errNo = error;
638
6.39M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
6.39M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
6.39M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
6.39M
                    val);
642
6.39M
    if (ctxt != NULL) {
643
6.39M
  ctxt->wellFormed = 0;
644
6.39M
  if (ctxt->recovery == 0)
645
1.03M
      ctxt->disableSAX = 1;
646
6.39M
    }
647
6.39M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
649k
{
662
649k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
649k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
649k
    if (ctxt != NULL)
666
649k
  ctxt->errNo = error;
667
649k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
649k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
649k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
649k
                    val);
671
649k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
2.44M
{
689
2.44M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
2.44M
        (ctxt->instate == XML_PARSER_EOF))
691
142
  return;
692
2.44M
    if (ctxt != NULL)
693
2.44M
  ctxt->errNo = error;
694
2.44M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
2.44M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
2.44M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
2.44M
                    info1, info2, info3);
698
2.44M
    if (ctxt != NULL)
699
2.44M
  ctxt->nsWellFormed = 0;
700
2.44M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
86.5k
{
718
86.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
86.5k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
86.5k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
86.5k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
86.5k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
86.5k
                    info1, info2, info3);
725
86.5k
}
726
727
static void
728
165M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
165M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
165M
    else
732
165M
        *dst += val;
733
165M
}
734
735
static void
736
45.6M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
45.6M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
45.6M
    else
740
45.6M
        *dst += val;
741
45.6M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
44.8M
{
770
44.8M
    unsigned long consumed;
771
44.8M
    xmlParserInputPtr input = ctxt->input;
772
44.8M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
44.8M
    consumed = input->parentConsumed;
779
44.8M
    if ((entity == NULL) ||
780
44.8M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
29.8M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
29.8M
        xmlSaturatedAdd(&consumed, input->consumed);
783
29.8M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
29.8M
    }
785
44.8M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
44.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
44.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
44.8M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
44.8M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
635
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
635
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
635
                       "Maximum entity amplification factor exceeded");
803
635
        xmlHaltParser(ctxt);
804
635
        return(1);
805
635
    }
806
807
44.8M
    return(0);
808
44.8M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
2.09M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
2.09M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
2.09M
    (void) sax;
1048
1049
2.09M
    if (ctxt == NULL) return;
1050
2.09M
    sax = ctxt->sax;
1051
2.09M
#ifdef LIBXML_SAX1_ENABLED
1052
2.09M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
2.09M
        ((sax->startElementNs != NULL) ||
1054
1.35M
         (sax->endElementNs != NULL) ||
1055
1.35M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.35M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
2.09M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
2.09M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
2.09M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
2.09M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
2.09M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
2.09M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
187k
{
1103
187k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
261k
    while (*src == 0x20) src++;
1107
7.15M
    while (*src != 0) {
1108
6.96M
  if (*src == 0x20) {
1109
632k
      while (*src == 0x20) src++;
1110
220k
      if (*src != 0)
1111
185k
    *dst++ = 0x20;
1112
6.74M
  } else {
1113
6.74M
      *dst++ = *src++;
1114
6.74M
  }
1115
6.96M
    }
1116
187k
    *dst = 0;
1117
187k
    if (dst == src)
1118
130k
       return(NULL);
1119
56.4k
    return(dst);
1120
187k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
76.0k
{
1136
76.0k
    int i;
1137
76.0k
    int remove_head = 0;
1138
76.0k
    int need_realloc = 0;
1139
76.0k
    const xmlChar *cur;
1140
1141
76.0k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
76.0k
    i = *len;
1144
76.0k
    if (i <= 0)
1145
9.69k
        return(NULL);
1146
1147
66.3k
    cur = src;
1148
76.0k
    while (*cur == 0x20) {
1149
9.71k
        cur++;
1150
9.71k
  remove_head++;
1151
9.71k
    }
1152
1.27M
    while (*cur != 0) {
1153
1.21M
  if (*cur == 0x20) {
1154
96.2k
      cur++;
1155
96.2k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
9.28k
          need_realloc = 1;
1157
9.28k
    break;
1158
9.28k
      }
1159
96.2k
  } else
1160
1.12M
      cur++;
1161
1.21M
    }
1162
66.3k
    if (need_realloc) {
1163
9.28k
        xmlChar *ret;
1164
1165
9.28k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
9.28k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
9.28k
  xmlAttrNormalizeSpace(ret, ret);
1171
9.28k
  *len = strlen((const char *)ret);
1172
9.28k
        return(ret);
1173
57.0k
    } else if (remove_head) {
1174
3.14k
        *len -= remove_head;
1175
3.14k
        memmove(src, src + remove_head, 1 + *len);
1176
3.14k
  return(src);
1177
3.14k
    }
1178
53.8k
    return(NULL);
1179
66.3k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
188k
               const xmlChar *value) {
1195
188k
    xmlDefAttrsPtr defaults;
1196
188k
    int len;
1197
188k
    const xmlChar *name;
1198
188k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
188k
    if (ctxt->attsSpecial != NULL) {
1204
146k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
38.9k
      return;
1206
146k
    }
1207
1208
149k
    if (ctxt->attsDefault == NULL) {
1209
49.1k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
49.1k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
49.1k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
149k
    name = xmlSplitQName3(fullname, &len);
1219
149k
    if (name == NULL) {
1220
136k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
136k
  prefix = NULL;
1222
136k
    } else {
1223
12.8k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
12.8k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
12.8k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
149k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
149k
    if (defaults == NULL) {
1232
89.3k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
89.3k
                     (4 * 5) * sizeof(const xmlChar *));
1234
89.3k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
89.3k
  defaults->nbAttrs = 0;
1237
89.3k
  defaults->maxAttrs = 4;
1238
89.3k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
89.3k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
89.3k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
2.17k
        xmlDefAttrsPtr temp;
1245
1246
2.17k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
2.17k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
2.17k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
2.17k
  defaults = temp;
1251
2.17k
  defaults->maxAttrs *= 2;
1252
2.17k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
2.17k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
2.17k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
149k
    name = xmlSplitQName3(fullattr, &len);
1264
149k
    if (name == NULL) {
1265
119k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
119k
  prefix = NULL;
1267
119k
    } else {
1268
30.6k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
30.6k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
30.6k
    }
1271
1272
149k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
149k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
149k
    len = xmlStrlen(value);
1276
149k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
149k
    if (value == NULL)
1278
0
        goto mem_error;
1279
149k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
149k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
149k
    if (ctxt->external)
1282
51.5k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
98.1k
    else
1284
98.1k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
149k
    defaults->nbAttrs++;
1286
1287
149k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
149k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
1.28M
{
1309
1.28M
    if (ctxt->attsSpecial == NULL) {
1310
84.2k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
84.2k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
84.2k
    }
1314
1315
1.28M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
115k
        return;
1317
1318
1.16M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
1.16M
                     (void *) (ptrdiff_t) type);
1320
1.16M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
1.28M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
871k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
871k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
871k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
313k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
313k
    }
1341
871k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
321k
{
1354
321k
    if (ctxt->attsSpecial == NULL)
1355
268k
        return;
1356
1357
52.2k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
52.2k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
14.1k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
14.1k
        ctxt->attsSpecial = NULL;
1362
14.1k
    }
1363
52.2k
    return;
1364
321k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
182k
{
1427
182k
    const xmlChar *cur = lang, *nxt;
1428
1429
182k
    if (cur == NULL)
1430
4.95k
        return (0);
1431
177k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
177k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
177k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
177k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
8.69k
        cur += 2;
1441
49.9k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
49.9k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
41.2k
            cur++;
1444
8.69k
        return(cur[0] == 0);
1445
8.69k
    }
1446
169k
    nxt = cur;
1447
781k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
781k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
611k
           nxt++;
1450
169k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
19.6k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
15.2k
            return(0);
1456
4.41k
        return(1);
1457
19.6k
    }
1458
149k
    if (nxt - cur < 2)
1459
17.2k
        return(0);
1460
    /* we got an ISO 639 code */
1461
132k
    if (nxt[0] == 0)
1462
13.2k
        return(1);
1463
119k
    if (nxt[0] != '-')
1464
8.99k
        return(0);
1465
1466
110k
    nxt++;
1467
110k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
110k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
17.7k
        goto region_m49;
1471
1472
435k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
435k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
343k
           nxt++;
1475
92.3k
    if (nxt - cur == 4)
1476
26.2k
        goto script;
1477
66.0k
    if (nxt - cur == 2)
1478
17.0k
        goto region;
1479
48.9k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
9.96k
        goto variant;
1481
38.9k
    if (nxt - cur != 3)
1482
13.0k
        return(0);
1483
    /* we parsed an extlang */
1484
25.9k
    if (nxt[0] == 0)
1485
1.63k
        return(1);
1486
24.2k
    if (nxt[0] != '-')
1487
3.05k
        return(0);
1488
1489
21.2k
    nxt++;
1490
21.2k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
21.2k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
2.97k
        goto region_m49;
1494
1495
165k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
165k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
147k
           nxt++;
1498
18.2k
    if (nxt - cur == 2)
1499
1.71k
        goto region;
1500
16.5k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
3.93k
        goto variant;
1502
12.6k
    if (nxt - cur != 4)
1503
9.31k
        return(0);
1504
    /* we parsed a script */
1505
29.5k
script:
1506
29.5k
    if (nxt[0] == 0)
1507
4.11k
        return(1);
1508
25.4k
    if (nxt[0] != '-')
1509
6.93k
        return(0);
1510
1511
18.5k
    nxt++;
1512
18.5k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
18.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
1.16k
        goto region_m49;
1516
1517
100k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
100k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
83.5k
           nxt++;
1520
1521
17.3k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
2.60k
        goto variant;
1523
14.7k
    if (nxt - cur != 2)
1524
8.87k
        return(0);
1525
    /* we parsed a region */
1526
36.2k
region:
1527
36.2k
    if (nxt[0] == 0)
1528
3.61k
        return(1);
1529
32.6k
    if (nxt[0] != '-')
1530
12.0k
        return(0);
1531
1532
20.5k
    nxt++;
1533
20.5k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
216k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
216k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
195k
           nxt++;
1538
1539
20.5k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
16.3k
        return(0);
1541
1542
    /* we parsed a variant */
1543
20.7k
variant:
1544
20.7k
    if (nxt[0] == 0)
1545
2.54k
        return(1);
1546
18.1k
    if (nxt[0] != '-')
1547
12.9k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
5.23k
    return (1);
1550
1551
21.9k
region_m49:
1552
21.9k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
21.9k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
11.5k
        nxt += 3;
1555
11.5k
        goto region;
1556
11.5k
    }
1557
10.3k
    return(0);
1558
21.9k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
789k
{
1584
789k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
339k
        int i;
1586
900k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
777k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
216k
          if (ctxt->nsTab[i + 1] == URL)
1590
98.0k
        return(-2);
1591
    /* out of scope keep it */
1592
118k
    break;
1593
216k
      }
1594
777k
  }
1595
339k
    }
1596
690k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
101k
  ctxt->nsMax = 10;
1598
101k
  ctxt->nsNr = 0;
1599
101k
  ctxt->nsTab = (const xmlChar **)
1600
101k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
101k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
589k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
21.6k
        const xmlChar ** tmp;
1608
21.6k
        ctxt->nsMax *= 2;
1609
21.6k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
21.6k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
21.6k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
21.6k
  ctxt->nsTab = tmp;
1617
21.6k
    }
1618
690k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
690k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
690k
    return (ctxt->nsNr);
1621
690k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
248k
{
1634
248k
    int i;
1635
1636
248k
    if (ctxt->nsTab == NULL) return(0);
1637
248k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
248k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
840k
    for (i = 0;i < nr;i++) {
1645
592k
         ctxt->nsNr--;
1646
592k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
592k
    }
1648
248k
    return(nr);
1649
248k
}
1650
#endif
1651
1652
static int
1653
179k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
179k
    const xmlChar **atts;
1655
179k
    int *attallocs;
1656
179k
    int maxatts;
1657
1658
179k
    if (nr + 5 > ctxt->maxatts) {
1659
179k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
179k
  atts = (const xmlChar **) xmlMalloc(
1661
179k
             maxatts * sizeof(const xmlChar *));
1662
179k
  if (atts == NULL) goto mem_error;
1663
179k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
179k
                               (maxatts / 5) * sizeof(int));
1665
179k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
179k
        if (ctxt->maxatts > 0)
1670
1.15k
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
179k
        xmlFree(ctxt->atts);
1672
179k
  ctxt->atts = atts;
1673
179k
  ctxt->attallocs = attallocs;
1674
179k
  ctxt->maxatts = maxatts;
1675
179k
    }
1676
179k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
179k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
16.3M
{
1694
16.3M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
16.3M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
624
        size_t newSize = ctxt->inputMax * 2;
1698
624
        xmlParserInputPtr *tmp;
1699
1700
624
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
624
                                               newSize * sizeof(*tmp));
1702
624
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
624
        ctxt->inputTab = tmp;
1707
624
        ctxt->inputMax = newSize;
1708
624
    }
1709
16.3M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
16.3M
    ctxt->input = value;
1711
16.3M
    return (ctxt->inputNr++);
1712
16.3M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
18.8M
{
1724
18.8M
    xmlParserInputPtr ret;
1725
1726
18.8M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
18.8M
    if (ctxt->inputNr <= 0)
1729
2.54M
        return (NULL);
1730
16.2M
    ctxt->inputNr--;
1731
16.2M
    if (ctxt->inputNr > 0)
1732
15.0M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
1.17M
    else
1734
1.17M
        ctxt->input = NULL;
1735
16.2M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
16.2M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
16.2M
    return (ret);
1738
18.8M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
14.0M
{
1751
14.0M
    if (ctxt == NULL) return(0);
1752
14.0M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
89.6k
        xmlNodePtr *tmp;
1754
1755
89.6k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
89.6k
                                      ctxt->nodeMax * 2 *
1757
89.6k
                                      sizeof(ctxt->nodeTab[0]));
1758
89.6k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
89.6k
        ctxt->nodeTab = tmp;
1763
89.6k
  ctxt->nodeMax *= 2;
1764
89.6k
    }
1765
14.0M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
14.0M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
329
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
329
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
329
        xmlParserMaxDepth);
1770
329
  xmlHaltParser(ctxt);
1771
329
  return(-1);
1772
329
    }
1773
14.0M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
14.0M
    ctxt->node = value;
1775
14.0M
    return (ctxt->nodeNr++);
1776
14.0M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
11.8M
{
1789
11.8M
    xmlNodePtr ret;
1790
1791
11.8M
    if (ctxt == NULL) return(NULL);
1792
11.8M
    if (ctxt->nodeNr <= 0)
1793
493k
        return (NULL);
1794
11.3M
    ctxt->nodeNr--;
1795
11.3M
    if (ctxt->nodeNr > 0)
1796
9.72M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
1.61M
    else
1798
1.61M
        ctxt->node = NULL;
1799
11.3M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
11.3M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
11.3M
    return (ret);
1802
11.8M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
13.3M
{
1821
13.3M
    xmlStartTag *tag;
1822
1823
13.3M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
185k
        const xmlChar * *tmp;
1825
185k
        xmlStartTag *tmp2;
1826
185k
        ctxt->nameMax *= 2;
1827
185k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
185k
                                    ctxt->nameMax *
1829
185k
                                    sizeof(ctxt->nameTab[0]));
1830
185k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
185k
  ctxt->nameTab = tmp;
1835
185k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
185k
                                    ctxt->nameMax *
1837
185k
                                    sizeof(ctxt->pushTab[0]));
1838
185k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
185k
  ctxt->pushTab = tmp2;
1843
13.2M
    } else if (ctxt->pushTab == NULL) {
1844
682k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
682k
                                            sizeof(ctxt->pushTab[0]));
1846
682k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
682k
    }
1849
13.3M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
13.3M
    ctxt->name = value;
1851
13.3M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
13.3M
    tag->prefix = prefix;
1853
13.3M
    tag->URI = URI;
1854
13.3M
    tag->line = line;
1855
13.3M
    tag->nsNr = nsNr;
1856
13.3M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
13.3M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.82M
{
1873
1.82M
    const xmlChar *ret;
1874
1875
1.82M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.82M
    ctxt->nameNr--;
1878
1.82M
    if (ctxt->nameNr > 0)
1879
1.80M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
26.0k
    else
1881
26.0k
        ctxt->name = NULL;
1882
1.82M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.82M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.82M
    return (ret);
1885
1.82M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
5.52M
{
1931
5.52M
    const xmlChar *ret;
1932
1933
5.52M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
5.52M
    ctxt->nameNr--;
1936
5.52M
    if (ctxt->nameNr > 0)
1937
5.33M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
184k
    else
1939
184k
        ctxt->name = NULL;
1940
5.52M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
5.52M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
5.52M
    return (ret);
1943
5.52M
}
1944
1945
16.8M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
16.8M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
106k
        int *tmp;
1948
1949
106k
  ctxt->spaceMax *= 2;
1950
106k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
106k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
106k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
106k
  ctxt->spaceTab = tmp;
1958
106k
    }
1959
16.8M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
16.8M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
16.8M
    return(ctxt->spaceNr++);
1962
16.8M
}
1963
1964
14.0M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
14.0M
    int ret;
1966
14.0M
    if (ctxt->spaceNr <= 0) return(0);
1967
13.8M
    ctxt->spaceNr--;
1968
13.8M
    if (ctxt->spaceNr > 0)
1969
13.4M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
427k
    else
1971
427k
        ctxt->space = &ctxt->spaceTab[0];
1972
13.8M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
13.8M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
13.8M
    return(ret);
1975
14.0M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
351M
#define RAW (*ctxt->input->cur)
2013
227M
#define CUR (*ctxt->input->cur)
2014
239M
#define NXT(val) ctxt->input->cur[(val)]
2015
23.7M
#define CUR_PTR ctxt->input->cur
2016
5.01M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
85.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
43.1M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
80.1M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
70.8M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
62.1M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
54.2M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
25.6M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
25.6M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
342k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
342k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
74.6M
#define SKIP(val) do {             \
2037
74.6M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
74.6M
    if (*ctxt->input->cur == 0)           \
2039
74.6M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
74.6M
  } while (0)
2041
2042
166k
#define SKIPL(val) do {             \
2043
166k
    int skipl;                \
2044
17.6M
    for(skipl=0; skipl<val; skipl++) {         \
2045
17.5M
  if (*(ctxt->input->cur) == '\n') {       \
2046
404k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
17.1M
  } else ctxt->input->col++;         \
2048
17.5M
  ctxt->input->cur++;           \
2049
17.5M
    }                  \
2050
166k
    if (*ctxt->input->cur == 0)           \
2051
166k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
166k
  } while (0)
2053
2054
156M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
156M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
156M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
156M
  xmlSHRINK (ctxt);
2058
2059
3.09M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
3.09M
    if ((ctxt->input->buf) &&
2062
3.09M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
9.79k
        xmlParserInputShrink(ctxt->input);
2064
3.09M
    if (*ctxt->input->cur == 0)
2065
112k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
3.09M
}
2067
2068
467M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
467M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
467M
  xmlGROW (ctxt);
2071
2072
47.9M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
47.9M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
47.9M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
47.9M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
47.9M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
47.9M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
47.9M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
47.9M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
47.9M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
47.9M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
47.9M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
917k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
47.9M
}
2095
2096
102M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
191M
#define NEXT xmlNextChar(ctxt)
2099
2100
23.9M
#define NEXT1 {               \
2101
23.9M
  ctxt->input->col++;           \
2102
23.9M
  ctxt->input->cur++;           \
2103
23.9M
  if (*ctxt->input->cur == 0)         \
2104
23.9M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
23.9M
    }
2106
2107
410M
#define NEXTL(l) do {             \
2108
410M
    if (*(ctxt->input->cur) == '\n') {         \
2109
5.84M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
404M
    } else ctxt->input->col++;           \
2111
410M
    ctxt->input->cur += l;        \
2112
410M
  } while (0)
2113
2114
445M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
3.12G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
3.41G
    if (l == 1) b[i++] = v;           \
2119
3.41G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
102M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
102M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
102M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
102M
        (ctxt->instate == XML_PARSER_START)) {
2141
58.3M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
58.3M
  cur = ctxt->input->cur;
2146
58.3M
  while (IS_BLANK_CH(*cur)) {
2147
23.1M
      if (*cur == '\n') {
2148
2.11M
    ctxt->input->line++; ctxt->input->col = 1;
2149
21.0M
      } else {
2150
21.0M
    ctxt->input->col++;
2151
21.0M
      }
2152
23.1M
      cur++;
2153
23.1M
      if (res < INT_MAX)
2154
23.1M
    res++;
2155
23.1M
      if (*cur == 0) {
2156
70.8k
    ctxt->input->cur = cur;
2157
70.8k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
70.8k
    cur = ctxt->input->cur;
2159
70.8k
      }
2160
23.1M
  }
2161
58.3M
  ctxt->input->cur = cur;
2162
58.3M
    } else {
2163
44.0M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
146M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
146M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
71.4M
    NEXT;
2168
74.9M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
16.4M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
596k
                    break;
2174
15.8M
          xmlParsePEReference(ctxt);
2175
58.5M
            } else if (CUR == 0) {
2176
15.1M
                unsigned long consumed;
2177
15.1M
                xmlEntityPtr ent;
2178
2179
15.1M
                if (ctxt->inputNr <= 1)
2180
67.7k
                    break;
2181
2182
15.0M
                consumed = ctxt->input->consumed;
2183
15.0M
                xmlSaturatedAddSizeT(&consumed,
2184
15.0M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
15.0M
                ent = ctxt->input->entity;
2191
15.0M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
15.0M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
3.16k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
3.16k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
3.16k
                }
2197
2198
15.0M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
15.0M
                xmlPopInput(ctxt);
2201
43.3M
            } else {
2202
43.3M
                break;
2203
43.3M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
102M
      if (res < INT_MAX)
2213
102M
    res++;
2214
102M
        }
2215
44.0M
    }
2216
102M
    return(res);
2217
102M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
15.0M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
15.0M
    xmlParserInputPtr input;
2237
2238
15.0M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
15.0M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
15.0M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
15.0M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
15.0M
    input = inputPop(ctxt);
2247
15.0M
    if (input->entity != NULL)
2248
15.0M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
15.0M
    xmlFreeInputStream(input);
2250
15.0M
    if (*ctxt->input->cur == 0)
2251
7.18M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
15.0M
    return(CUR);
2253
15.0M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
15.1M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
15.1M
    int ret;
2267
15.1M
    if (input == NULL) return(-1);
2268
2269
15.1M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
15.1M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
15.1M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
15.1M
    ret = inputPush(ctxt, input);
2285
15.1M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
15.1M
    GROW;
2288
15.1M
    return(ret);
2289
15.1M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.91M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.91M
    int val = 0;
2311
1.91M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.91M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.91M
        (NXT(2) == 'x')) {
2318
440k
  SKIP(3);
2319
440k
  GROW;
2320
1.68M
  while (RAW != ';') { /* loop blocked by count */
2321
1.37M
      if (count++ > 20) {
2322
39.0k
    count = 0;
2323
39.0k
    GROW;
2324
39.0k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
39.0k
      }
2327
1.37M
      if ((RAW >= '0') && (RAW <= '9'))
2328
862k
          val = val * 16 + (CUR - '0');
2329
515k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
255k
          val = val * 16 + (CUR - 'a') + 10;
2331
260k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
129k
          val = val * 16 + (CUR - 'A') + 10;
2333
130k
      else {
2334
130k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
130k
    val = 0;
2336
130k
    break;
2337
130k
      }
2338
1.24M
      if (val > 0x110000)
2339
359k
          val = 0x110000;
2340
2341
1.24M
      NEXT;
2342
1.24M
      count++;
2343
1.24M
  }
2344
440k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
309k
      ctxt->input->col++;
2347
309k
      ctxt->input->cur++;
2348
309k
  }
2349
1.46M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
1.46M
  SKIP(2);
2351
1.46M
  GROW;
2352
5.51M
  while (RAW != ';') { /* loop blocked by count */
2353
4.30M
      if (count++ > 20) {
2354
39.3k
    count = 0;
2355
39.3k
    GROW;
2356
39.3k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
39.3k
      }
2359
4.30M
      if ((RAW >= '0') && (RAW <= '9'))
2360
4.04M
          val = val * 10 + (CUR - '0');
2361
254k
      else {
2362
254k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
254k
    val = 0;
2364
254k
    break;
2365
254k
      }
2366
4.04M
      if (val > 0x110000)
2367
305k
          val = 0x110000;
2368
2369
4.04M
      NEXT;
2370
4.04M
      count++;
2371
4.04M
  }
2372
1.46M
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
1.21M
      ctxt->input->col++;
2375
1.21M
      ctxt->input->cur++;
2376
1.21M
  }
2377
1.46M
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.91M
    if (val >= 0x110000) {
2389
4.73k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
4.73k
                "xmlParseCharRef: character reference out of bounds\n",
2391
4.73k
          val);
2392
1.90M
    } else if (IS_CHAR(val)) {
2393
1.45M
        return(val);
2394
1.45M
    } else {
2395
449k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
449k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
449k
                    val);
2398
449k
    }
2399
454k
    return(0);
2400
1.91M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
293k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
293k
    const xmlChar *ptr;
2423
293k
    xmlChar cur;
2424
293k
    int val = 0;
2425
2426
293k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
293k
    ptr = *str;
2428
293k
    cur = *ptr;
2429
293k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
37.6k
  ptr += 3;
2431
37.6k
  cur = *ptr;
2432
127k
  while (cur != ';') { /* Non input consuming loop */
2433
97.6k
      if ((cur >= '0') && (cur <= '9'))
2434
47.5k
          val = val * 16 + (cur - '0');
2435
50.1k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
16.9k
          val = val * 16 + (cur - 'a') + 10;
2437
33.2k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
25.4k
          val = val * 16 + (cur - 'A') + 10;
2439
7.81k
      else {
2440
7.81k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
7.81k
    val = 0;
2442
7.81k
    break;
2443
7.81k
      }
2444
89.8k
      if (val > 0x110000)
2445
26.9k
          val = 0x110000;
2446
2447
89.8k
      ptr++;
2448
89.8k
      cur = *ptr;
2449
89.8k
  }
2450
37.6k
  if (cur == ';')
2451
29.8k
      ptr++;
2452
255k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
255k
  ptr += 2;
2454
255k
  cur = *ptr;
2455
900k
  while (cur != ';') { /* Non input consuming loops */
2456
655k
      if ((cur >= '0') && (cur <= '9'))
2457
644k
          val = val * 10 + (cur - '0');
2458
10.5k
      else {
2459
10.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
10.5k
    val = 0;
2461
10.5k
    break;
2462
10.5k
      }
2463
644k
      if (val > 0x110000)
2464
49.1k
          val = 0x110000;
2465
2466
644k
      ptr++;
2467
644k
      cur = *ptr;
2468
644k
  }
2469
255k
  if (cur == ';')
2470
245k
      ptr++;
2471
255k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
293k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
293k
    if (val >= 0x110000) {
2483
839
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
839
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
839
                val);
2486
292k
    } else if (IS_CHAR(val)) {
2487
266k
        return(val);
2488
266k
    } else {
2489
26.1k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
26.1k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
26.1k
        val);
2492
26.1k
    }
2493
27.0k
    return(0);
2494
293k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.92M
#define growBuffer(buffer, n) {           \
2593
1.92M
    xmlChar *tmp;             \
2594
1.92M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.92M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.92M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.92M
    if (tmp == NULL) goto mem_error;         \
2598
1.92M
    buffer = tmp;             \
2599
1.92M
    buffer##_size = new_size;                                           \
2600
1.92M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
29.1M
                           int check) {
2617
29.1M
    xmlChar *buffer = NULL;
2618
29.1M
    size_t buffer_size = 0;
2619
29.1M
    size_t nbchars = 0;
2620
2621
29.1M
    xmlChar *current = NULL;
2622
29.1M
    xmlChar *rep = NULL;
2623
29.1M
    const xmlChar *last;
2624
29.1M
    xmlEntityPtr ent;
2625
29.1M
    int c,l;
2626
2627
29.1M
    if (str == NULL)
2628
18.9k
        return(NULL);
2629
29.1M
    last = str + len;
2630
2631
29.1M
    if (((ctxt->depth > 40) &&
2632
29.1M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
29.1M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
29.1M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
29.1M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
29.1M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
29.1M
    if (str < last)
2651
29.0M
  c = CUR_SCHAR(str, l);
2652
133k
    else
2653
133k
        c = 0;
2654
2.25G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
2.25G
           (c != end2) && (c != end3) &&
2656
2.25G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
2.22G
  if (c == 0) break;
2659
2.22G
        if ((c == '&') && (str[1] == '#')) {
2660
293k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
293k
      if (val == 0)
2662
27.0k
                goto int_error;
2663
266k
      COPY_BUF(0,buffer,nbchars,val);
2664
266k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
2.22k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
2.22k
      }
2667
2.22G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
28.2M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
28.2M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
28.2M
      if ((ent != NULL) &&
2674
28.2M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
36.5k
    if (ent->content != NULL) {
2676
36.5k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
36.5k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
1.59k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
1.59k
        }
2680
36.5k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
28.1M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
26.4M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
248
                    goto int_error;
2688
2689
26.4M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
652
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
652
                    xmlHaltParser(ctxt);
2692
652
                    ent->content[0] = 0;
2693
652
                    goto int_error;
2694
652
                }
2695
2696
26.4M
                ent->flags |= XML_ENT_EXPANDING;
2697
26.4M
    ctxt->depth++;
2698
26.4M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
26.4M
                        ent->length, what, 0, 0, 0, check);
2700
26.4M
    ctxt->depth--;
2701
26.4M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
26.4M
    if (rep == NULL) {
2704
4.70k
                    ent->content[0] = 0;
2705
4.70k
                    goto int_error;
2706
4.70k
                }
2707
2708
26.3M
                current = rep;
2709
5.52G
                while (*current != 0) { /* non input consuming loop */
2710
5.50G
                    buffer[nbchars++] = *current++;
2711
5.50G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
2.94M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
2.94M
                    }
2714
5.50G
                }
2715
26.3M
                xmlFree(rep);
2716
26.3M
                rep = NULL;
2717
26.3M
      } else if (ent != NULL) {
2718
1.05M
    int i = xmlStrlen(ent->name);
2719
1.05M
    const xmlChar *cur = ent->name;
2720
2721
1.05M
    buffer[nbchars++] = '&';
2722
1.05M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
4.71k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
4.71k
    }
2725
7.33M
    for (;i > 0;i--)
2726
6.27M
        buffer[nbchars++] = *cur++;
2727
1.05M
    buffer[nbchars++] = ';';
2728
1.05M
      }
2729
2.19G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
353k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
353k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
353k
      if (ent != NULL) {
2735
329k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
3.11k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
3.11k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
3.11k
      (ctxt->validate != 0)) {
2745
2.91k
      xmlLoadEntityContent(ctxt, ent);
2746
2.91k
        } else {
2747
191
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
191
      "not validating will not read content for PE entity %s\n",
2749
191
                          ent->name, NULL);
2750
191
        }
2751
3.11k
    }
2752
2753
329k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
137
                    goto int_error;
2755
2756
329k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
383
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
383
                    xmlHaltParser(ctxt);
2759
383
                    if (ent->content != NULL)
2760
244
                        ent->content[0] = 0;
2761
383
                    goto int_error;
2762
383
                }
2763
2764
329k
                ent->flags |= XML_ENT_EXPANDING;
2765
329k
    ctxt->depth++;
2766
329k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
329k
                        ent->length, what, 0, 0, 0, check);
2768
329k
    ctxt->depth--;
2769
329k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
329k
    if (rep == NULL) {
2772
1.86k
                    if (ent->content != NULL)
2773
287
                        ent->content[0] = 0;
2774
1.86k
                    goto int_error;
2775
1.86k
                }
2776
327k
                current = rep;
2777
1.22G
                while (*current != 0) { /* non input consuming loop */
2778
1.22G
                    buffer[nbchars++] = *current++;
2779
1.22G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
72.3k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
72.3k
                    }
2782
1.22G
                }
2783
327k
                xmlFree(rep);
2784
327k
                rep = NULL;
2785
327k
      }
2786
2.19G
  } else {
2787
2.19G
      COPY_BUF(l,buffer,nbchars,c);
2788
2.19G
      str += l;
2789
2.19G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
451k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
451k
      }
2792
2.19G
  }
2793
2.22G
  if (str < last)
2794
2.19G
      c = CUR_SCHAR(str, l);
2795
28.9M
  else
2796
28.9M
      c = 0;
2797
2.22G
    }
2798
29.1M
    buffer[nbchars] = 0;
2799
29.1M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
35.0k
int_error:
2804
35.0k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
35.0k
    if (buffer != NULL)
2807
35.0k
        xmlFree(buffer);
2808
35.0k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
7.55k
                           xmlChar end3) {
2836
7.55k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
7.55k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
7.55k
                                      end, end2, end3, 0));
2840
7.55k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
297k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
297k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
297k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
297k
                                      end, end2, end3, 0));
2868
297k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
9.69M
                     int blank_chars) {
2890
9.69M
    int i, ret;
2891
9.69M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
9.69M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
2.34M
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
7.35M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
7.35M
        (*(ctxt->space) == -2))
2905
3.33M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
4.01M
    if (blank_chars == 0) {
2911
7.01M
  for (i = 0;i < len;i++)
2912
6.23M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.84M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
2.94M
    if (ctxt->node == NULL) return(0);
2919
2.79M
    if (ctxt->myDoc != NULL) {
2920
2.79M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.79M
        if (ret == 0) return(1);
2922
2.59M
        if (ret == 1) return(0);
2923
2.59M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
2.57M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
2.47M
    if ((ctxt->node->children == NULL) &&
2930
2.47M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
2.46M
    lastChild = xmlGetLastChild(ctxt->node);
2933
2.46M
    if (lastChild == NULL) {
2934
915k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
915k
            (ctxt->node->content != NULL)) return(0);
2936
1.54M
    } else if (xmlNodeIsText(lastChild))
2937
129k
        return(0);
2938
1.41M
    else if ((ctxt->node->children != NULL) &&
2939
1.41M
             (xmlNodeIsText(ctxt->node->children)))
2940
41.3k
        return(0);
2941
2.28M
    return(1);
2942
2.46M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
9.38M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
9.38M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
9.38M
    xmlChar *buffer = NULL;
2973
9.38M
    int len = 0;
2974
9.38M
    int max = XML_MAX_NAMELEN;
2975
9.38M
    xmlChar *ret = NULL;
2976
9.38M
    const xmlChar *cur = name;
2977
9.38M
    int c;
2978
2979
9.38M
    if (prefix == NULL) return(NULL);
2980
9.38M
    *prefix = NULL;
2981
2982
9.38M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
9.38M
    if (cur[0] == ':')
2993
38.8k
  return(xmlStrdup(name));
2994
2995
9.35M
    c = *cur++;
2996
51.2M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
41.8M
  buf[len++] = c;
2998
41.8M
  c = *cur++;
2999
41.8M
    }
3000
9.35M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
27.3k
  max = len * 2;
3006
3007
27.3k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
27.3k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
27.3k
  memcpy(buffer, buf, len);
3013
2.95M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
2.92M
      if (len + 10 > max) {
3015
7.43k
          xmlChar *tmp;
3016
3017
7.43k
    max *= 2;
3018
7.43k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
7.43k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
7.43k
    buffer = tmp;
3025
7.43k
      }
3026
2.92M
      buffer[len++] = c;
3027
2.92M
      c = *cur++;
3028
2.92M
  }
3029
27.3k
  buffer[len] = 0;
3030
27.3k
    }
3031
3032
9.35M
    if ((c == ':') && (*cur == 0)) {
3033
68.2k
        if (buffer != NULL)
3034
1.64k
      xmlFree(buffer);
3035
68.2k
  *prefix = NULL;
3036
68.2k
  return(xmlStrdup(name));
3037
68.2k
    }
3038
3039
9.28M
    if (buffer == NULL)
3040
9.25M
  ret = xmlStrndup(buf, len);
3041
25.7k
    else {
3042
25.7k
  ret = buffer;
3043
25.7k
  buffer = NULL;
3044
25.7k
  max = XML_MAX_NAMELEN;
3045
25.7k
    }
3046
3047
3048
9.28M
    if (c == ':') {
3049
2.13M
  c = *cur;
3050
2.13M
        *prefix = ret;
3051
2.13M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
2.13M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
2.13M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
2.13M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
2.13M
        (c == '_') || (c == ':'))) {
3063
93.6k
      int l;
3064
93.6k
      int first = CUR_SCHAR(cur, l);
3065
3066
93.6k
      if (!IS_LETTER(first) && (first != '_')) {
3067
24.4k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
24.4k
          "Name %s is not XML Namespace compliant\n",
3069
24.4k
          name);
3070
24.4k
      }
3071
93.6k
  }
3072
2.13M
  cur++;
3073
3074
15.4M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
13.2M
      buf[len++] = c;
3076
13.2M
      c = *cur++;
3077
13.2M
  }
3078
2.13M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
19.2k
      max = len * 2;
3084
3085
19.2k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
19.2k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
19.2k
      memcpy(buffer, buf, len);
3091
1.93M
      while (c != 0) { /* tested bigname2.xml */
3092
1.91M
    if (len + 10 > max) {
3093
7.20k
        xmlChar *tmp;
3094
3095
7.20k
        max *= 2;
3096
7.20k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
7.20k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
7.20k
        buffer = tmp;
3103
7.20k
    }
3104
1.91M
    buffer[len++] = c;
3105
1.91M
    c = *cur++;
3106
1.91M
      }
3107
19.2k
      buffer[len] = 0;
3108
19.2k
  }
3109
3110
2.13M
  if (buffer == NULL)
3111
2.11M
      ret = xmlStrndup(buf, len);
3112
19.2k
  else {
3113
19.2k
      ret = buffer;
3114
19.2k
  }
3115
2.13M
    }
3116
3117
9.28M
    return(ret);
3118
9.28M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
32.6M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
32.6M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
28.7M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
28.7M
      (((c >= 'a') && (c <= 'z')) ||
3160
28.7M
       ((c >= 'A') && (c <= 'Z')) ||
3161
28.7M
       (c == '_') || (c == ':') ||
3162
28.7M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
28.7M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
28.7M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
28.7M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
28.7M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
28.7M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
28.7M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
28.7M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
28.7M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
28.7M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
28.7M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
28.7M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
27.3M
      return(1);
3175
28.7M
    } else {
3176
3.89M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
3.25M
      return(1);
3178
3.89M
    }
3179
2.10M
    return(0);
3180
32.6M
}
3181
3182
static int
3183
901M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
901M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
852M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
852M
      (((c >= 'a') && (c <= 'z')) ||
3191
851M
       ((c >= 'A') && (c <= 'Z')) ||
3192
851M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
851M
       (c == '_') || (c == ':') ||
3194
851M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
851M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
851M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
851M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
851M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
851M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
851M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
851M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
851M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
851M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
851M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
851M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
851M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
851M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
851M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
824M
       return(1);
3210
852M
    } else {
3211
49.3M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
49.3M
            (c == '.') || (c == '-') ||
3213
49.3M
      (c == '_') || (c == ':') ||
3214
49.3M
      (IS_COMBINING(c)) ||
3215
49.3M
      (IS_EXTENDER(c)))
3216
46.0M
      return(1);
3217
49.3M
    }
3218
30.7M
    return(0);
3219
901M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
4.66M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
4.66M
    int len = 0, l;
3227
4.66M
    int c;
3228
4.66M
    int count = 0;
3229
4.66M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
1.99M
                    XML_MAX_TEXT_LENGTH :
3231
4.66M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
4.66M
    GROW;
3241
4.66M
    if (ctxt->instate == XML_PARSER_EOF)
3242
8
        return(NULL);
3243
4.66M
    c = CUR_CHAR(l);
3244
4.66M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
2.57M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
2.57M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
2.41M
         ((c >= 'A') && (c <= 'Z')) ||
3252
2.41M
         (c == '_') || (c == ':') ||
3253
2.41M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
2.41M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
2.41M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
2.41M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
2.41M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
2.41M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
2.41M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
2.41M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
2.41M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
2.41M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
2.41M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
2.41M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.66M
      return(NULL);
3266
1.66M
  }
3267
908k
  len += l;
3268
908k
  NEXTL(l);
3269
908k
  c = CUR_CHAR(l);
3270
19.8M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
19.8M
         (((c >= 'a') && (c <= 'z')) ||
3272
19.5M
          ((c >= 'A') && (c <= 'Z')) ||
3273
19.5M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
19.5M
          (c == '_') || (c == ':') ||
3275
19.5M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
19.5M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
19.5M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
19.5M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
19.5M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
19.5M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
19.5M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
19.5M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
19.5M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
19.5M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
19.5M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
19.5M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
19.5M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
19.5M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
19.5M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
19.5M
    )) {
3291
18.8M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
68.1k
    count = 0;
3293
68.1k
    GROW;
3294
68.1k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
68.1k
      }
3297
18.8M
            if (len <= INT_MAX - l)
3298
18.8M
          len += l;
3299
18.8M
      NEXTL(l);
3300
18.8M
      c = CUR_CHAR(l);
3301
18.8M
  }
3302
2.08M
    } else {
3303
2.08M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
2.08M
      (!IS_LETTER(c) && (c != '_') &&
3305
1.95M
       (c != ':'))) {
3306
1.40M
      return(NULL);
3307
1.40M
  }
3308
682k
  len += l;
3309
682k
  NEXTL(l);
3310
682k
  c = CUR_CHAR(l);
3311
3312
14.3M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
14.3M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
14.1M
    (c == '.') || (c == '-') ||
3315
14.1M
    (c == '_') || (c == ':') ||
3316
14.1M
    (IS_COMBINING(c)) ||
3317
14.1M
    (IS_EXTENDER(c)))) {
3318
13.7M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
49.4k
    count = 0;
3320
49.4k
    GROW;
3321
49.4k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
49.4k
      }
3324
13.7M
            if (len <= INT_MAX - l)
3325
13.7M
          len += l;
3326
13.7M
      NEXTL(l);
3327
13.7M
      c = CUR_CHAR(l);
3328
13.7M
  }
3329
682k
    }
3330
1.59M
    if (len > maxLength) {
3331
10
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
10
        return(NULL);
3333
10
    }
3334
1.59M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
1.59M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
5.65k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
1.58M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
1.59M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
44.3M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
44.3M
    const xmlChar *in;
3370
44.3M
    const xmlChar *ret;
3371
44.3M
    size_t count = 0;
3372
44.3M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
10.7M
                       XML_MAX_TEXT_LENGTH :
3374
44.3M
                       XML_MAX_NAME_LENGTH;
3375
3376
44.3M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
44.3M
    in = ctxt->input->cur;
3386
44.3M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
44.3M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
44.3M
  (*in == '_') || (*in == ':')) {
3389
40.6M
  in++;
3390
184M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
184M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
184M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
184M
         (*in == '_') || (*in == '-') ||
3394
184M
         (*in == ':') || (*in == '.'))
3395
143M
      in++;
3396
40.6M
  if ((*in > 0) && (*in < 0x80)) {
3397
39.7M
      count = in - ctxt->input->cur;
3398
39.7M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
39.7M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
39.7M
      ctxt->input->cur = in;
3404
39.7M
      ctxt->input->col += count;
3405
39.7M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
39.7M
      return(ret);
3408
39.7M
  }
3409
40.6M
    }
3410
    /* accelerator for special cases */
3411
4.66M
    return(xmlParseNameComplex(ctxt));
3412
44.3M
}
3413
3414
static const xmlChar *
3415
3.74M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
3.74M
    int len = 0, l;
3417
3.74M
    int c;
3418
3.74M
    int count = 0;
3419
3.74M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
1.11M
                    XML_MAX_TEXT_LENGTH :
3421
3.74M
                    XML_MAX_NAME_LENGTH;
3422
3.74M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
3.74M
    GROW;
3432
3.74M
    startPosition = CUR_PTR - BASE_PTR;
3433
3.74M
    c = CUR_CHAR(l);
3434
3.74M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
3.74M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
2.47M
  return(NULL);
3437
2.47M
    }
3438
3439
23.1M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
23.1M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
21.8M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
65.0k
      count = 0;
3443
65.0k
      GROW;
3444
65.0k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
65.0k
  }
3447
21.8M
        if (len <= INT_MAX - l)
3448
21.8M
      len += l;
3449
21.8M
  NEXTL(l);
3450
21.8M
  c = CUR_CHAR(l);
3451
21.8M
  if (c == 0) {
3452
87.2k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
87.2k
      ctxt->input->cur -= l;
3459
87.2k
      GROW;
3460
87.2k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
87.2k
      ctxt->input->cur += l;
3463
87.2k
      c = CUR_CHAR(l);
3464
87.2k
  }
3465
21.8M
    }
3466
1.26M
    if (len > maxLength) {
3467
4
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
4
        return(NULL);
3469
4
    }
3470
1.26M
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
1.26M
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
25.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
25.0M
    const xmlChar *in, *e;
3491
25.0M
    const xmlChar *ret;
3492
25.0M
    size_t count = 0;
3493
25.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
5.41M
                       XML_MAX_TEXT_LENGTH :
3495
25.0M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
25.0M
    in = ctxt->input->cur;
3505
25.0M
    e = ctxt->input->end;
3506
25.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
25.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
25.0M
   (*in == '_')) && (in < e)) {
3509
21.8M
  in++;
3510
83.6M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
83.6M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
83.6M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
83.6M
          (*in == '_') || (*in == '-') ||
3514
83.6M
          (*in == '.')) && (in < e))
3515
61.7M
      in++;
3516
21.8M
  if (in >= e)
3517
4.52k
      goto complex;
3518
21.8M
  if ((*in > 0) && (*in < 0x80)) {
3519
21.2M
      count = in - ctxt->input->cur;
3520
21.2M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
21.2M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
21.2M
      ctxt->input->cur = in;
3526
21.2M
      ctxt->input->col += count;
3527
21.2M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
21.2M
      return(ret);
3531
21.2M
  }
3532
21.8M
    }
3533
3.74M
complex:
3534
3.74M
    return(xmlParseNCNameComplex(ctxt));
3535
25.0M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
3.85M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
3.85M
    register const xmlChar *cmp = other;
3551
3.85M
    register const xmlChar *in;
3552
3.85M
    const xmlChar *ret;
3553
3554
3.85M
    GROW;
3555
3.85M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
3.85M
    in = ctxt->input->cur;
3559
18.5M
    while (*in != 0 && *in == *cmp) {
3560
14.6M
  ++in;
3561
14.6M
  ++cmp;
3562
14.6M
    }
3563
3.85M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
3.09M
  ctxt->input->col += in - ctxt->input->cur;
3566
3.09M
  ctxt->input->cur = in;
3567
3.09M
  return (const xmlChar*) 1;
3568
3.09M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
759k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
759k
    if (ret == other) {
3573
42.0k
  return (const xmlChar*) 1;
3574
42.0k
    }
3575
717k
    return ret;
3576
759k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
29.2M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
29.2M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
29.2M
    const xmlChar *cur = *str;
3600
29.2M
    int len = 0, l;
3601
29.2M
    int c;
3602
29.2M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
3.26M
                    XML_MAX_TEXT_LENGTH :
3604
29.2M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
29.2M
    c = CUR_SCHAR(cur, l);
3611
29.2M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
16.6k
  return(NULL);
3613
16.6k
    }
3614
3615
29.2M
    COPY_BUF(l,buf,len,c);
3616
29.2M
    cur += l;
3617
29.2M
    c = CUR_SCHAR(cur, l);
3618
356M
    while (xmlIsNameChar(ctxt, c)) {
3619
329M
  COPY_BUF(l,buf,len,c);
3620
329M
  cur += l;
3621
329M
  c = CUR_SCHAR(cur, l);
3622
329M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
2.29M
      xmlChar *buffer;
3628
2.29M
      int max = len * 2;
3629
3630
2.29M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
2.29M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
2.29M
      memcpy(buffer, buf, len);
3636
514M
      while (xmlIsNameChar(ctxt, c)) {
3637
511M
    if (len + 10 > max) {
3638
2.26M
        xmlChar *tmp;
3639
3640
2.26M
        max *= 2;
3641
2.26M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
2.26M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
2.26M
        buffer = tmp;
3648
2.26M
    }
3649
511M
    COPY_BUF(l,buffer,len,c);
3650
511M
    cur += l;
3651
511M
    c = CUR_SCHAR(cur, l);
3652
511M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
511M
      }
3658
2.29M
      buffer[len] = 0;
3659
2.29M
      *str = cur;
3660
2.29M
      return(buffer);
3661
2.29M
  }
3662
329M
    }
3663
26.9M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
26.9M
    *str = cur;
3668
26.9M
    return(xmlStrndup(buf, len));
3669
26.9M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
775k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
775k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
775k
    int len = 0, l;
3690
775k
    int c;
3691
775k
    int count = 0;
3692
775k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
231k
                    XML_MAX_TEXT_LENGTH :
3694
775k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
775k
    GROW;
3701
775k
    if (ctxt->instate == XML_PARSER_EOF)
3702
6
        return(NULL);
3703
775k
    c = CUR_CHAR(l);
3704
3705
5.82M
    while (xmlIsNameChar(ctxt, c)) {
3706
5.06M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
5.06M
  COPY_BUF(l,buf,len,c);
3711
5.06M
  NEXTL(l);
3712
5.06M
  c = CUR_CHAR(l);
3713
5.06M
  if (c == 0) {
3714
4.45k
      count = 0;
3715
4.45k
      GROW;
3716
4.45k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
4.45k
            c = CUR_CHAR(l);
3719
4.45k
  }
3720
5.06M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
11.3k
      xmlChar *buffer;
3726
11.3k
      int max = len * 2;
3727
3728
11.3k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
11.3k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
11.3k
      memcpy(buffer, buf, len);
3734
2.22M
      while (xmlIsNameChar(ctxt, c)) {
3735
2.21M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
28.2k
        count = 0;
3737
28.2k
        GROW;
3738
28.2k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
28.2k
    }
3743
2.21M
    if (len + 10 > max) {
3744
5.66k
        xmlChar *tmp;
3745
3746
5.66k
        max *= 2;
3747
5.66k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
5.66k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
5.66k
        buffer = tmp;
3754
5.66k
    }
3755
2.21M
    COPY_BUF(l,buffer,len,c);
3756
2.21M
    NEXTL(l);
3757
2.21M
    c = CUR_CHAR(l);
3758
2.21M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
2.21M
      }
3764
11.3k
      buffer[len] = 0;
3765
11.3k
      return(buffer);
3766
11.3k
  }
3767
5.06M
    }
3768
764k
    if (len == 0)
3769
160k
        return(NULL);
3770
603k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
603k
    return(xmlStrndup(buf, len));
3775
603k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
954k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
954k
    xmlChar *buf = NULL;
3795
954k
    int len = 0;
3796
954k
    int size = XML_PARSER_BUFFER_SIZE;
3797
954k
    int c, l;
3798
954k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
234k
                    XML_MAX_HUGE_LENGTH :
3800
954k
                    XML_MAX_TEXT_LENGTH;
3801
954k
    xmlChar stop;
3802
954k
    xmlChar *ret = NULL;
3803
954k
    const xmlChar *cur = NULL;
3804
954k
    xmlParserInputPtr input;
3805
3806
954k
    if (RAW == '"') stop = '"';
3807
201k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
954k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
954k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
954k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
954k
    input = ctxt->input;
3824
954k
    GROW;
3825
954k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
954k
    NEXT;
3828
954k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
52.0M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
52.0M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
51.0M
  if (len + 5 >= size) {
3841
140k
      xmlChar *tmp;
3842
3843
140k
      size *= 2;
3844
140k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
140k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
140k
      buf = tmp;
3850
140k
  }
3851
51.0M
  COPY_BUF(l,buf,len,c);
3852
51.0M
  NEXTL(l);
3853
3854
51.0M
  GROW;
3855
51.0M
  c = CUR_CHAR(l);
3856
51.0M
  if (c == 0) {
3857
3.35k
      GROW;
3858
3.35k
      c = CUR_CHAR(l);
3859
3.35k
  }
3860
3861
51.0M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
51.0M
    }
3867
954k
    buf[len] = 0;
3868
954k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
954k
    if (c != stop) {
3871
5.32k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
5.32k
        goto error;
3873
5.32k
    }
3874
949k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
949k
    cur = buf;
3882
35.7M
    while (*cur != 0) { /* non input consuming */
3883
34.8M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
637k
      xmlChar *name;
3885
637k
      xmlChar tmp = *cur;
3886
637k
            int nameOk = 0;
3887
3888
637k
      cur++;
3889
637k
      name = xmlParseStringName(ctxt, &cur);
3890
637k
            if (name != NULL) {
3891
627k
                nameOk = 1;
3892
627k
                xmlFree(name);
3893
627k
            }
3894
637k
            if ((nameOk == 0) || (*cur != ';')) {
3895
20.8k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
20.8k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
20.8k
                            tmp);
3898
20.8k
                goto error;
3899
20.8k
      }
3900
616k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
616k
    (ctxt->inputNr == 1)) {
3902
6.36k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
6.36k
                goto error;
3904
6.36k
      }
3905
610k
      if (*cur == 0)
3906
0
          break;
3907
610k
  }
3908
34.8M
  cur++;
3909
34.8M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
921k
    ++ctxt->depth;
3920
921k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
921k
                                     0, 0, 0, /* check */ 1);
3922
921k
    --ctxt->depth;
3923
3924
921k
    if (orig != NULL) {
3925
921k
        *orig = buf;
3926
921k
        buf = NULL;
3927
921k
    }
3928
3929
954k
error:
3930
954k
    if (buf != NULL)
3931
32.5k
        xmlFree(buf);
3932
954k
    return(ret);
3933
921k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.80M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.80M
    xmlChar limit = 0;
3950
1.80M
    xmlChar *buf = NULL;
3951
1.80M
    xmlChar *rep = NULL;
3952
1.80M
    size_t len = 0;
3953
1.80M
    size_t buf_size = 0;
3954
1.80M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
624k
                       XML_MAX_HUGE_LENGTH :
3956
1.80M
                       XML_MAX_TEXT_LENGTH;
3957
1.80M
    int c, l, in_space = 0;
3958
1.80M
    xmlChar *current = NULL;
3959
1.80M
    xmlEntityPtr ent;
3960
3961
1.80M
    if (NXT(0) == '"') {
3962
1.44M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.44M
  limit = '"';
3964
1.44M
        NEXT;
3965
1.44M
    } else if (NXT(0) == '\'') {
3966
357k
  limit = '\'';
3967
357k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
357k
        NEXT;
3969
357k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.80M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.80M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.80M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.80M
    c = CUR_CHAR(l);
3985
65.8M
    while (((NXT(0) != limit) && /* checked */
3986
65.8M
            (IS_CHAR(c)) && (c != '<')) &&
3987
65.8M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
64.0M
  if (c == '&') {
3989
3.00M
      in_space = 0;
3990
3.00M
      if (NXT(1) == '#') {
3991
597k
    int val = xmlParseCharRef(ctxt);
3992
3993
597k
    if (val == '&') {
3994
55.9k
        if (ctxt->replaceEntities) {
3995
19.2k
      if (len + 10 > buf_size) {
3996
574
          growBuffer(buf, 10);
3997
574
      }
3998
19.2k
      buf[len++] = '&';
3999
36.6k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
36.6k
      if (len + 10 > buf_size) {
4005
736
          growBuffer(buf, 10);
4006
736
      }
4007
36.6k
      buf[len++] = '&';
4008
36.6k
      buf[len++] = '#';
4009
36.6k
      buf[len++] = '3';
4010
36.6k
      buf[len++] = '8';
4011
36.6k
      buf[len++] = ';';
4012
36.6k
        }
4013
541k
    } else if (val != 0) {
4014
440k
        if (len + 10 > buf_size) {
4015
6.15k
      growBuffer(buf, 10);
4016
6.15k
        }
4017
440k
        len += xmlCopyChar(0, &buf[len], val);
4018
440k
    }
4019
2.40M
      } else {
4020
2.40M
    ent = xmlParseEntityRef(ctxt);
4021
2.40M
    if ((ent != NULL) &&
4022
2.40M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
166k
        if (len + 10 > buf_size) {
4024
972
      growBuffer(buf, 10);
4025
972
        }
4026
166k
        if ((ctxt->replaceEntities == 0) &&
4027
166k
            (ent->content[0] == '&')) {
4028
56.3k
      buf[len++] = '&';
4029
56.3k
      buf[len++] = '#';
4030
56.3k
      buf[len++] = '3';
4031
56.3k
      buf[len++] = '8';
4032
56.3k
      buf[len++] = ';';
4033
109k
        } else {
4034
109k
      buf[len++] = ent->content[0];
4035
109k
        }
4036
2.24M
    } else if ((ent != NULL) &&
4037
2.24M
               (ctxt->replaceEntities != 0)) {
4038
1.22M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
1.22M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
1.22M
      ++ctxt->depth;
4043
1.22M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
1.22M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
1.22M
                                /* check */ 1);
4046
1.22M
      --ctxt->depth;
4047
1.22M
      if (rep != NULL) {
4048
1.21M
          current = rep;
4049
241M
          while (*current != 0) { /* non input consuming */
4050
240M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
240M
                                    (*current == 0x9)) {
4052
148k
                                    buf[len++] = 0x20;
4053
148k
                                    current++;
4054
148k
                                } else
4055
240M
                                    buf[len++] = *current++;
4056
240M
        if (len + 10 > buf_size) {
4057
46.3k
            growBuffer(buf, 10);
4058
46.3k
        }
4059
240M
          }
4060
1.21M
          xmlFree(rep);
4061
1.21M
          rep = NULL;
4062
1.21M
      }
4063
1.22M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
1.22M
    } else if (ent != NULL) {
4071
547k
        int i = xmlStrlen(ent->name);
4072
547k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
547k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
547k
      (ent->content != NULL)) {
4081
489k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
11.5k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
11.5k
                            ctxt->sizeentcopy = ent->length;
4085
4086
11.5k
                            ++ctxt->depth;
4087
11.5k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
11.5k
                                    ent->content, ent->length,
4089
11.5k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
11.5k
                                    /* check */ 1);
4091
11.5k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
11.5k
                            if (ctxt->inSubset == 0) {
4100
10.6k
                                ent->flags |= XML_ENT_CHECKED;
4101
10.6k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
10.6k
                            }
4103
4104
11.5k
                            if (rep != NULL) {
4105
11.1k
                                xmlFree(rep);
4106
11.1k
                                rep = NULL;
4107
11.1k
                            } else {
4108
386
                                ent->content[0] = 0;
4109
386
                            }
4110
4111
11.5k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
24
                                goto error;
4113
477k
                        } else {
4114
477k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
6
                                goto error;
4116
477k
                        }
4117
489k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
547k
        buf[len++] = '&';
4123
552k
        while (len + i + 10 > buf_size) {
4124
9.17k
      growBuffer(buf, i + 10);
4125
9.17k
        }
4126
1.46M
        for (;i > 0;i--)
4127
914k
      buf[len++] = *cur++;
4128
547k
        buf[len++] = ';';
4129
547k
    }
4130
2.40M
      }
4131
61.0M
  } else {
4132
61.0M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
4.59M
          if ((len != 0) || (!normalize)) {
4134
4.47M
        if ((!normalize) || (!in_space)) {
4135
4.14M
      COPY_BUF(l,buf,len,0x20);
4136
4.15M
      while (len + 10 > buf_size) {
4137
22.4k
          growBuffer(buf, 10);
4138
22.4k
      }
4139
4.14M
        }
4140
4.47M
        in_space = 1;
4141
4.47M
    }
4142
56.4M
      } else {
4143
56.4M
          in_space = 0;
4144
56.4M
    COPY_BUF(l,buf,len,c);
4145
56.4M
    if (len + 10 > buf_size) {
4146
296k
        growBuffer(buf, 10);
4147
296k
    }
4148
56.4M
      }
4149
61.0M
      NEXTL(l);
4150
61.0M
  }
4151
64.0M
  GROW;
4152
64.0M
  c = CUR_CHAR(l);
4153
64.0M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
64.0M
    }
4159
1.80M
    if (ctxt->instate == XML_PARSER_EOF)
4160
876
        goto error;
4161
4162
1.80M
    if ((in_space) && (normalize)) {
4163
81.1k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
36.9k
    }
4165
1.80M
    buf[len] = 0;
4166
1.80M
    if (RAW == '<') {
4167
648k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.15M
    } else if (RAW != limit) {
4169
288k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
156k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
156k
         "invalid character in attribute value\n");
4172
156k
  } else {
4173
132k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
132k
         "AttValue: ' expected\n");
4175
132k
        }
4176
288k
    } else
4177
867k
  NEXT;
4178
4179
1.80M
    if (attlen != NULL) *attlen = len;
4180
1.80M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
906
error:
4185
906
    if (buf != NULL)
4186
906
        xmlFree(buf);
4187
906
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
906
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
3.43M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
3.43M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
3.43M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
3.43M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
364k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
364k
    xmlChar *buf = NULL;
4250
364k
    int len = 0;
4251
364k
    int size = XML_PARSER_BUFFER_SIZE;
4252
364k
    int cur, l;
4253
364k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
112k
                    XML_MAX_TEXT_LENGTH :
4255
364k
                    XML_MAX_NAME_LENGTH;
4256
364k
    xmlChar stop;
4257
364k
    int state = ctxt->instate;
4258
364k
    int count = 0;
4259
4260
364k
    SHRINK;
4261
364k
    if (RAW == '"') {
4262
287k
        NEXT;
4263
287k
  stop = '"';
4264
287k
    } else if (RAW == '\'') {
4265
59.7k
        NEXT;
4266
59.7k
  stop = '\'';
4267
59.7k
    } else {
4268
17.2k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
17.2k
  return(NULL);
4270
17.2k
    }
4271
4272
347k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
347k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
347k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
347k
    cur = CUR_CHAR(l);
4279
15.7M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
15.4M
  if (len + 5 >= size) {
4281
23.3k
      xmlChar *tmp;
4282
4283
23.3k
      size *= 2;
4284
23.3k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
23.3k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
23.3k
      buf = tmp;
4292
23.3k
  }
4293
15.4M
  count++;
4294
15.4M
  if (count > 50) {
4295
191k
      SHRINK;
4296
191k
      GROW;
4297
191k
      count = 0;
4298
191k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
191k
  }
4303
15.4M
  COPY_BUF(l,buf,len,cur);
4304
15.4M
  NEXTL(l);
4305
15.4M
  cur = CUR_CHAR(l);
4306
15.4M
  if (cur == 0) {
4307
5.17k
      GROW;
4308
5.17k
      SHRINK;
4309
5.17k
      cur = CUR_CHAR(l);
4310
5.17k
  }
4311
15.4M
        if (len > maxLength) {
4312
44
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
44
            xmlFree(buf);
4314
44
            ctxt->instate = (xmlParserInputState) state;
4315
44
            return(NULL);
4316
44
        }
4317
15.4M
    }
4318
347k
    buf[len] = 0;
4319
347k
    ctxt->instate = (xmlParserInputState) state;
4320
347k
    if (!IS_CHAR(cur)) {
4321
8.62k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
338k
    } else {
4323
338k
  NEXT;
4324
338k
    }
4325
347k
    return(buf);
4326
347k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
131k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
131k
    xmlChar *buf = NULL;
4344
131k
    int len = 0;
4345
131k
    int size = XML_PARSER_BUFFER_SIZE;
4346
131k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
42.1k
                    XML_MAX_TEXT_LENGTH :
4348
131k
                    XML_MAX_NAME_LENGTH;
4349
131k
    xmlChar cur;
4350
131k
    xmlChar stop;
4351
131k
    int count = 0;
4352
131k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
131k
    SHRINK;
4355
131k
    if (RAW == '"') {
4356
83.9k
        NEXT;
4357
83.9k
  stop = '"';
4358
83.9k
    } else if (RAW == '\'') {
4359
44.1k
        NEXT;
4360
44.1k
  stop = '\'';
4361
44.1k
    } else {
4362
3.00k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
3.00k
  return(NULL);
4364
3.00k
    }
4365
128k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
128k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
128k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
128k
    cur = CUR;
4372
4.45M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
4.32M
  if (len + 1 >= size) {
4374
6.87k
      xmlChar *tmp;
4375
4376
6.87k
      size *= 2;
4377
6.87k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
6.87k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
6.87k
      buf = tmp;
4384
6.87k
  }
4385
4.32M
  buf[len++] = cur;
4386
4.32M
  count++;
4387
4.32M
  if (count > 50) {
4388
39.9k
      SHRINK;
4389
39.9k
      GROW;
4390
39.9k
      count = 0;
4391
39.9k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
39.9k
  }
4396
4.32M
  NEXT;
4397
4.32M
  cur = CUR;
4398
4.32M
  if (cur == 0) {
4399
956
      GROW;
4400
956
      SHRINK;
4401
956
      cur = CUR;
4402
956
  }
4403
4.32M
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
4.32M
    }
4409
128k
    buf[len] = 0;
4410
128k
    if (cur != stop) {
4411
11.4k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
116k
    } else {
4413
116k
  NEXT;
4414
116k
    }
4415
128k
    ctxt->instate = oldstate;
4416
128k
    return(buf);
4417
128k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
38.5M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
38.5M
    const xmlChar *in;
4482
38.5M
    int nbchar = 0;
4483
38.5M
    int line = ctxt->input->line;
4484
38.5M
    int col = ctxt->input->col;
4485
38.5M
    int ccol;
4486
4487
38.5M
    SHRINK;
4488
38.5M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
38.5M
    in = ctxt->input->cur;
4494
43.4M
    do {
4495
49.7M
get_more_space:
4496
70.9M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
49.7M
        if (*in == 0xA) {
4498
7.27M
            do {
4499
7.27M
                ctxt->input->line++; ctxt->input->col = 1;
4500
7.27M
                in++;
4501
7.27M
            } while (*in == 0xA);
4502
6.27M
            goto get_more_space;
4503
6.27M
        }
4504
43.4M
        if (*in == '<') {
4505
6.71M
            nbchar = in - ctxt->input->cur;
4506
6.71M
            if (nbchar > 0) {
4507
6.71M
                const xmlChar *tmp = ctxt->input->cur;
4508
6.71M
                ctxt->input->cur = in;
4509
4510
6.71M
                if ((ctxt->sax != NULL) &&
4511
6.71M
                    (ctxt->sax->ignorableWhitespace !=
4512
6.71M
                     ctxt->sax->characters)) {
4513
2.96M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.83M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.83M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.83M
                                                   tmp, nbchar);
4517
1.83M
                    } else {
4518
1.12M
                        if (ctxt->sax->characters != NULL)
4519
1.12M
                            ctxt->sax->characters(ctxt->userData,
4520
1.12M
                                                  tmp, nbchar);
4521
1.12M
                        if (*ctxt->space == -1)
4522
330k
                            *ctxt->space = -2;
4523
1.12M
                    }
4524
3.75M
                } else if ((ctxt->sax != NULL) &&
4525
3.75M
                           (ctxt->sax->characters != NULL)) {
4526
3.75M
                    ctxt->sax->characters(ctxt->userData,
4527
3.75M
                                          tmp, nbchar);
4528
3.75M
                }
4529
6.71M
            }
4530
6.71M
            return;
4531
6.71M
        }
4532
4533
44.8M
get_more:
4534
44.8M
        ccol = ctxt->input->col;
4535
393M
        while (test_char_data[*in]) {
4536
349M
            in++;
4537
349M
            ccol++;
4538
349M
        }
4539
44.8M
        ctxt->input->col = ccol;
4540
44.8M
        if (*in == 0xA) {
4541
6.67M
            do {
4542
6.67M
                ctxt->input->line++; ctxt->input->col = 1;
4543
6.67M
                in++;
4544
6.67M
            } while (*in == 0xA);
4545
6.13M
            goto get_more;
4546
6.13M
        }
4547
38.7M
        if (*in == ']') {
4548
2.18M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
195k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
195k
                ctxt->input->cur = in + 1;
4551
195k
                return;
4552
195k
            }
4553
1.98M
            in++;
4554
1.98M
            ctxt->input->col++;
4555
1.98M
            goto get_more;
4556
2.18M
        }
4557
36.5M
        nbchar = in - ctxt->input->cur;
4558
36.5M
        if (nbchar > 0) {
4559
17.8M
            if ((ctxt->sax != NULL) &&
4560
17.8M
                (ctxt->sax->ignorableWhitespace !=
4561
17.8M
                 ctxt->sax->characters) &&
4562
17.8M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
2.11M
                const xmlChar *tmp = ctxt->input->cur;
4564
2.11M
                ctxt->input->cur = in;
4565
4566
2.11M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
616k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
616k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
616k
                                                       tmp, nbchar);
4570
1.49M
                } else {
4571
1.49M
                    if (ctxt->sax->characters != NULL)
4572
1.49M
                        ctxt->sax->characters(ctxt->userData,
4573
1.49M
                                              tmp, nbchar);
4574
1.49M
                    if (*ctxt->space == -1)
4575
635k
                        *ctxt->space = -2;
4576
1.49M
                }
4577
2.11M
                line = ctxt->input->line;
4578
2.11M
                col = ctxt->input->col;
4579
15.7M
            } else if (ctxt->sax != NULL) {
4580
15.7M
                if (ctxt->sax->characters != NULL)
4581
15.7M
                    ctxt->sax->characters(ctxt->userData,
4582
15.7M
                                          ctxt->input->cur, nbchar);
4583
15.7M
                line = ctxt->input->line;
4584
15.7M
                col = ctxt->input->col;
4585
15.7M
            }
4586
17.8M
        }
4587
36.5M
        ctxt->input->cur = in;
4588
36.5M
        if (*in == 0xD) {
4589
5.34M
            in++;
4590
5.34M
            if (*in == 0xA) {
4591
4.99M
                ctxt->input->cur = in;
4592
4.99M
                in++;
4593
4.99M
                ctxt->input->line++; ctxt->input->col = 1;
4594
4.99M
                continue; /* while */
4595
4.99M
            }
4596
351k
            in--;
4597
351k
        }
4598
31.5M
        if (*in == '<') {
4599
10.6M
            return;
4600
10.6M
        }
4601
20.9M
        if (*in == '&') {
4602
1.82M
            return;
4603
1.82M
        }
4604
19.0M
        SHRINK;
4605
19.0M
        GROW;
4606
19.0M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
19.0M
        in = ctxt->input->cur;
4609
24.0M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
24.0M
             (*in == 0x09) || (*in == 0x0a));
4611
19.2M
    ctxt->input->line = line;
4612
19.2M
    ctxt->input->col = col;
4613
19.2M
    xmlParseCharDataComplex(ctxt);
4614
19.2M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
19.2M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
19.2M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
19.2M
    int nbchar = 0;
4631
19.2M
    int cur, l;
4632
19.2M
    int count = 0;
4633
4634
19.2M
    SHRINK;
4635
19.2M
    GROW;
4636
19.2M
    cur = CUR_CHAR(l);
4637
154M
    while ((cur != '<') && /* checked */
4638
154M
           (cur != '&') &&
4639
154M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
135M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
70.4k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
70.4k
  }
4643
135M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
135M
  NEXTL(l);
4646
135M
  cur = CUR_CHAR(l);
4647
135M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
77.7k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
77.7k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
69.4k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
76
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
76
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
76
                                     buf, nbchar);
4658
69.4k
    } else {
4659
69.4k
        if (ctxt->sax->characters != NULL)
4660
69.4k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
69.4k
        if ((ctxt->sax->characters !=
4662
69.4k
             ctxt->sax->ignorableWhitespace) &&
4663
69.4k
      (*ctxt->space == -1))
4664
3.80k
      *ctxt->space = -2;
4665
69.4k
    }
4666
69.4k
      }
4667
77.7k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
77.7k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
77.7k
  }
4672
135M
  count++;
4673
135M
  if (count > 50) {
4674
1.25M
      SHRINK;
4675
1.25M
      GROW;
4676
1.25M
      count = 0;
4677
1.25M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.25M
  }
4680
135M
    }
4681
19.2M
    if (nbchar != 0) {
4682
4.91M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
4.91M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
4.54M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
25.8k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
25.8k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
4.52M
      } else {
4691
4.52M
    if (ctxt->sax->characters != NULL)
4692
4.52M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
4.52M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
4.52M
        (*ctxt->space == -1))
4695
559k
        *ctxt->space = -2;
4696
4.52M
      }
4697
4.54M
  }
4698
4.91M
    }
4699
19.2M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
15.3M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
15.3M
                          "PCDATA invalid Char value %d\n",
4703
15.3M
                    cur ? cur : CUR);
4704
15.3M
  NEXT;
4705
15.3M
    }
4706
19.2M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
672k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
672k
    xmlChar *URI = NULL;
4735
4736
672k
    SHRINK;
4737
4738
672k
    *publicID = NULL;
4739
672k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
245k
        SKIP(6);
4741
245k
  if (SKIP_BLANKS == 0) {
4742
1.10k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
1.10k
                     "Space required after 'SYSTEM'\n");
4744
1.10k
  }
4745
245k
  URI = xmlParseSystemLiteral(ctxt);
4746
245k
  if (URI == NULL) {
4747
3.44k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
3.44k
        }
4749
426k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
131k
        SKIP(6);
4751
131k
  if (SKIP_BLANKS == 0) {
4752
2.18k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
2.18k
        "Space required after 'PUBLIC'\n");
4754
2.18k
  }
4755
131k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
131k
  if (*publicID == NULL) {
4757
3.00k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
3.00k
  }
4759
131k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
118k
      if (SKIP_BLANKS == 0) {
4764
13.6k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
13.6k
      "Space required after the Public Identifier\n");
4766
13.6k
      }
4767
118k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
12.4k
      if (SKIP_BLANKS == 0) return(NULL);
4775
625
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
625
  }
4777
118k
  URI = xmlParseSystemLiteral(ctxt);
4778
118k
  if (URI == NULL) {
4779
13.8k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
13.8k
        }
4781
118k
    }
4782
660k
    return(URI);
4783
672k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
290k
                       size_t len, size_t size) {
4802
290k
    int q, ql;
4803
290k
    int r, rl;
4804
290k
    int cur, l;
4805
290k
    size_t count = 0;
4806
290k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
91.8k
                       XML_MAX_HUGE_LENGTH :
4808
290k
                       XML_MAX_TEXT_LENGTH;
4809
290k
    int inputid;
4810
4811
290k
    inputid = ctxt->input->id;
4812
4813
290k
    if (buf == NULL) {
4814
21.2k
        len = 0;
4815
21.2k
  size = XML_PARSER_BUFFER_SIZE;
4816
21.2k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
21.2k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
21.2k
    }
4822
290k
    GROW; /* Assure there's enough input data */
4823
290k
    q = CUR_CHAR(ql);
4824
290k
    if (q == 0)
4825
45.7k
        goto not_terminated;
4826
245k
    if (!IS_CHAR(q)) {
4827
34.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
34.5k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
34.5k
                    q);
4830
34.5k
  xmlFree (buf);
4831
34.5k
  return;
4832
34.5k
    }
4833
210k
    NEXTL(ql);
4834
210k
    r = CUR_CHAR(rl);
4835
210k
    if (r == 0)
4836
4.59k
        goto not_terminated;
4837
205k
    if (!IS_CHAR(r)) {
4838
11.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
11.9k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
11.9k
                    r);
4841
11.9k
  xmlFree (buf);
4842
11.9k
  return;
4843
11.9k
    }
4844
193k
    NEXTL(rl);
4845
193k
    cur = CUR_CHAR(l);
4846
193k
    if (cur == 0)
4847
3.25k
        goto not_terminated;
4848
23.6M
    while (IS_CHAR(cur) && /* checked */
4849
23.6M
           ((cur != '>') ||
4850
23.5M
      (r != '-') || (q != '-'))) {
4851
23.4M
  if ((r == '-') && (q == '-')) {
4852
92.4k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
92.4k
  }
4854
23.4M
  if (len + 5 >= size) {
4855
87.7k
      xmlChar *new_buf;
4856
87.7k
            size_t new_size;
4857
4858
87.7k
      new_size = size * 2;
4859
87.7k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
87.7k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
87.7k
      buf = new_buf;
4866
87.7k
            size = new_size;
4867
87.7k
  }
4868
23.4M
  COPY_BUF(ql,buf,len,q);
4869
23.4M
  q = r;
4870
23.4M
  ql = rl;
4871
23.4M
  r = cur;
4872
23.4M
  rl = l;
4873
4874
23.4M
  count++;
4875
23.4M
  if (count > 50) {
4876
386k
      SHRINK;
4877
386k
      GROW;
4878
386k
      count = 0;
4879
386k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
386k
  }
4884
23.4M
  NEXTL(l);
4885
23.4M
  cur = CUR_CHAR(l);
4886
23.4M
  if (cur == 0) {
4887
21.4k
      SHRINK;
4888
21.4k
      GROW;
4889
21.4k
      cur = CUR_CHAR(l);
4890
21.4k
  }
4891
4892
23.4M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
23.4M
    }
4899
190k
    buf[len] = 0;
4900
190k
    if (cur == 0) {
4901
21.4k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
21.4k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
169k
    } else if (!IS_CHAR(cur)) {
4904
34.1k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
34.1k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
34.1k
                    cur);
4907
135k
    } else {
4908
135k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
135k
        NEXT;
4914
135k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
135k
      (!ctxt->disableSAX))
4916
111k
      ctxt->sax->comment(ctxt->userData, buf);
4917
135k
    }
4918
190k
    xmlFree(buf);
4919
190k
    return;
4920
53.5k
not_terminated:
4921
53.5k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
53.5k
       "Comment not terminated\n", NULL);
4923
53.5k
    xmlFree(buf);
4924
53.5k
    return;
4925
190k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
16.3M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
16.3M
    xmlChar *buf = NULL;
4943
16.3M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
16.3M
    size_t len = 0;
4945
16.3M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
749k
                       XML_MAX_HUGE_LENGTH :
4947
16.3M
                       XML_MAX_TEXT_LENGTH;
4948
16.3M
    xmlParserInputState state;
4949
16.3M
    const xmlChar *in;
4950
16.3M
    size_t nbchar = 0;
4951
16.3M
    int ccol;
4952
16.3M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
16.3M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
16.3M
    SKIP(2);
4960
16.3M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
387
        return;
4962
16.3M
    state = ctxt->instate;
4963
16.3M
    ctxt->instate = XML_PARSER_COMMENT;
4964
16.3M
    inputid = ctxt->input->id;
4965
16.3M
    SKIP(2);
4966
16.3M
    SHRINK;
4967
16.3M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
16.3M
    in = ctxt->input->cur;
4974
16.3M
    do {
4975
16.3M
  if (*in == 0xA) {
4976
189k
      do {
4977
189k
    ctxt->input->line++; ctxt->input->col = 1;
4978
189k
    in++;
4979
189k
      } while (*in == 0xA);
4980
162k
  }
4981
21.0M
get_more:
4982
21.0M
        ccol = ctxt->input->col;
4983
178M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
178M
         ((*in >= 0x20) && (*in < '-')) ||
4985
178M
         (*in == 0x09)) {
4986
157M
        in++;
4987
157M
        ccol++;
4988
157M
  }
4989
21.0M
  ctxt->input->col = ccol;
4990
21.0M
  if (*in == 0xA) {
4991
1.70M
      do {
4992
1.70M
    ctxt->input->line++; ctxt->input->col = 1;
4993
1.70M
    in++;
4994
1.70M
      } while (*in == 0xA);
4995
1.58M
      goto get_more;
4996
1.58M
  }
4997
19.4M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
19.4M
  if (nbchar > 0) {
5002
4.97M
      if ((ctxt->sax != NULL) &&
5003
4.97M
    (ctxt->sax->comment != NULL)) {
5004
4.97M
    if (buf == NULL) {
5005
1.94M
        if ((*in == '-') && (in[1] == '-'))
5006
695k
            size = nbchar + 1;
5007
1.24M
        else
5008
1.24M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
1.94M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
1.94M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
1.94M
        len = 0;
5016
3.02M
    } else if (len + nbchar + 1 >= size) {
5017
305k
        xmlChar *new_buf;
5018
305k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
305k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
305k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
305k
        buf = new_buf;
5027
305k
    }
5028
4.97M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
4.97M
    len += nbchar;
5030
4.97M
    buf[len] = 0;
5031
4.97M
      }
5032
4.97M
  }
5033
19.4M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
19.4M
  ctxt->input->cur = in;
5040
19.4M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
19.4M
  if (*in == 0xD) {
5045
1.07M
      in++;
5046
1.07M
      if (*in == 0xA) {
5047
1.05M
    ctxt->input->cur = in;
5048
1.05M
    in++;
5049
1.05M
    ctxt->input->line++; ctxt->input->col = 1;
5050
1.05M
    goto get_more;
5051
1.05M
      }
5052
19.5k
      in--;
5053
19.5k
  }
5054
18.3M
  SHRINK;
5055
18.3M
  GROW;
5056
18.3M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
18.3M
  in = ctxt->input->cur;
5061
18.3M
  if (*in == '-') {
5062
18.0M
      if (in[1] == '-') {
5063
16.1M
          if (in[2] == '>') {
5064
16.0M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
16.0M
        SKIP(3);
5070
16.0M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
16.0M
            (!ctxt->disableSAX)) {
5072
12.8M
      if (buf != NULL)
5073
880k
          ctxt->sax->comment(ctxt->userData, buf);
5074
12.0M
      else
5075
12.0M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
12.8M
        }
5077
16.0M
        if (buf != NULL)
5078
1.67M
            xmlFree(buf);
5079
16.0M
        if (ctxt->instate != XML_PARSER_EOF)
5080
16.0M
      ctxt->instate = state;
5081
16.0M
        return;
5082
16.0M
    }
5083
116k
    if (buf != NULL) {
5084
109k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
109k
                          "Double hyphen within comment: "
5086
109k
                                      "<!--%.50s\n",
5087
109k
              buf);
5088
109k
    } else
5089
7.80k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
7.80k
                          "Double hyphen within comment\n", NULL);
5091
116k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
116k
    in++;
5096
116k
    ctxt->input->col++;
5097
116k
      }
5098
2.01M
      in++;
5099
2.01M
      ctxt->input->col++;
5100
2.01M
      goto get_more;
5101
18.0M
  }
5102
18.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
290k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
290k
    ctxt->instate = state;
5105
290k
    return;
5106
16.3M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
820k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
820k
    const xmlChar *name;
5125
5126
820k
    name = xmlParseName(ctxt);
5127
820k
    if ((name != NULL) &&
5128
820k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
820k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
820k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
231k
  int i;
5132
231k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
231k
      (name[2] == 'l') && (name[3] == 0)) {
5134
167k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
167k
     "XML declaration allowed only at the start of the document\n");
5136
167k
      return(name);
5137
167k
  } else if (name[3] == 0) {
5138
16.9k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
16.9k
      return(name);
5140
16.9k
  }
5141
122k
  for (i = 0;;i++) {
5142
122k
      if (xmlW3CPIs[i] == NULL) break;
5143
85.9k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
10.1k
          return(name);
5145
85.9k
  }
5146
36.9k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
36.9k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
36.9k
          NULL, NULL);
5149
36.9k
    }
5150
626k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
24.6k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
24.6k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
24.6k
    }
5154
626k
    return(name);
5155
820k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
460
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
460
    xmlChar *URL = NULL;
5176
460
    const xmlChar *tmp, *base;
5177
460
    xmlChar marker;
5178
5179
460
    tmp = catalog;
5180
460
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
460
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
434
  goto error;
5183
26
    tmp += 7;
5184
26
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
26
    if (*tmp != '=') {
5186
26
  return;
5187
26
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
434
error:
5211
434
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
434
            "Catalog PI syntax error: %s\n",
5213
434
      catalog, NULL);
5214
434
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
434
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
820k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
820k
    xmlChar *buf = NULL;
5235
820k
    size_t len = 0;
5236
820k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
820k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
290k
                       XML_MAX_HUGE_LENGTH :
5239
820k
                       XML_MAX_TEXT_LENGTH;
5240
820k
    int cur, l;
5241
820k
    const xmlChar *target;
5242
820k
    xmlParserInputState state;
5243
820k
    int count = 0;
5244
5245
820k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
820k
  int inputid = ctxt->input->id;
5247
820k
  state = ctxt->instate;
5248
820k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
820k
  SKIP(2);
5253
820k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
820k
        target = xmlParsePITarget(ctxt);
5260
820k
  if (target != NULL) {
5261
676k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
148k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
148k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
148k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
148k
        (ctxt->sax->processingInstruction != NULL))
5274
133k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
133k
                                         target, NULL);
5276
148k
    if (ctxt->instate != XML_PARSER_EOF)
5277
148k
        ctxt->instate = state;
5278
148k
    return;
5279
148k
      }
5280
527k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
527k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
527k
      if (SKIP_BLANKS == 0) {
5287
197k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
197k
        "ParsePI: PI %s space expected\n", target);
5289
197k
      }
5290
527k
      cur = CUR_CHAR(l);
5291
39.0M
      while (IS_CHAR(cur) && /* checked */
5292
39.0M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
38.5M
    if (len + 5 >= size) {
5294
88.1k
        xmlChar *tmp;
5295
88.1k
                    size_t new_size = size * 2;
5296
88.1k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
88.1k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
88.1k
        buf = tmp;
5304
88.1k
                    size = new_size;
5305
88.1k
    }
5306
38.5M
    count++;
5307
38.5M
    if (count > 50) {
5308
582k
        SHRINK;
5309
582k
        GROW;
5310
582k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
582k
        count = 0;
5315
582k
    }
5316
38.5M
    COPY_BUF(l,buf,len,cur);
5317
38.5M
    NEXTL(l);
5318
38.5M
    cur = CUR_CHAR(l);
5319
38.5M
    if (cur == 0) {
5320
40.9k
        SHRINK;
5321
40.9k
        GROW;
5322
40.9k
        cur = CUR_CHAR(l);
5323
40.9k
    }
5324
38.5M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
38.5M
      }
5332
527k
      buf[len] = 0;
5333
527k
      if (cur != '?') {
5334
117k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
117k
          "ParsePI: PI %s never end ...\n", target);
5336
409k
      } else {
5337
409k
    if (inputid != ctxt->input->id) {
5338
17
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
17
                             "PI declaration doesn't start and stop in"
5340
17
                                   " the same entity\n");
5341
17
    }
5342
409k
    SKIP(2);
5343
5344
409k
#ifdef LIBXML_CATALOG_ENABLED
5345
409k
    if (((state == XML_PARSER_MISC) ||
5346
409k
               (state == XML_PARSER_START)) &&
5347
409k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
460
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
460
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
460
      (allow == XML_CATA_ALLOW_ALL))
5351
460
      xmlParseCatalogPI(ctxt, buf);
5352
460
    }
5353
409k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
409k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
409k
        (ctxt->sax->processingInstruction != NULL))
5361
353k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
353k
                                         target, buf);
5363
409k
      }
5364
527k
      xmlFree(buf);
5365
527k
  } else {
5366
144k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
144k
  }
5368
671k
  if (ctxt->instate != XML_PARSER_EOF)
5369
671k
      ctxt->instate = state;
5370
671k
    }
5371
820k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
26.9k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
26.9k
    const xmlChar *name;
5394
26.9k
    xmlChar *Pubid;
5395
26.9k
    xmlChar *Systemid;
5396
5397
26.9k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
26.9k
    SKIP(2);
5400
5401
26.9k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
25.6k
  int inputid = ctxt->input->id;
5403
25.6k
  SHRINK;
5404
25.6k
  SKIP(8);
5405
25.6k
  if (SKIP_BLANKS == 0) {
5406
1.32k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
1.32k
         "Space required after '<!NOTATION'\n");
5408
1.32k
      return;
5409
1.32k
  }
5410
5411
24.3k
        name = xmlParseName(ctxt);
5412
24.3k
  if (name == NULL) {
5413
680
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
680
      return;
5415
680
  }
5416
23.6k
  if (xmlStrchr(name, ':') != NULL) {
5417
636
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
636
         "colons are forbidden from notation names '%s'\n",
5419
636
         name, NULL, NULL);
5420
636
  }
5421
23.6k
  if (SKIP_BLANKS == 0) {
5422
1.26k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.26k
         "Space required after the NOTATION name'\n");
5424
1.26k
      return;
5425
1.26k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
22.3k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
22.3k
  SKIP_BLANKS;
5432
5433
22.3k
  if (RAW == '>') {
5434
14.3k
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
14.3k
      NEXT;
5440
14.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
14.3k
    (ctxt->sax->notationDecl != NULL))
5442
11.9k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
14.3k
  } else {
5444
7.96k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
7.96k
  }
5446
22.3k
  if (Systemid != NULL) xmlFree(Systemid);
5447
22.3k
  if (Pubid != NULL) xmlFree(Pubid);
5448
22.3k
    }
5449
26.9k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
1.11M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
1.11M
    const xmlChar *name = NULL;
5478
1.11M
    xmlChar *value = NULL;
5479
1.11M
    xmlChar *URI = NULL, *literal = NULL;
5480
1.11M
    const xmlChar *ndata = NULL;
5481
1.11M
    int isParameter = 0;
5482
1.11M
    xmlChar *orig = NULL;
5483
5484
1.11M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
1.11M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
1.11M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
1.11M
  int inputid = ctxt->input->id;
5491
1.11M
  SHRINK;
5492
1.11M
  SKIP(6);
5493
1.11M
  if (SKIP_BLANKS == 0) {
5494
9.05k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
9.05k
         "Space required after '<!ENTITY'\n");
5496
9.05k
  }
5497
5498
1.11M
  if (RAW == '%') {
5499
447k
      NEXT;
5500
447k
      if (SKIP_BLANKS == 0) {
5501
3.78k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
3.78k
             "Space required after '%%'\n");
5503
3.78k
      }
5504
447k
      isParameter = 1;
5505
447k
  }
5506
5507
1.11M
        name = xmlParseName(ctxt);
5508
1.11M
  if (name == NULL) {
5509
11.0k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
11.0k
                     "xmlParseEntityDecl: no name\n");
5511
11.0k
            return;
5512
11.0k
  }
5513
1.10M
  if (xmlStrchr(name, ':') != NULL) {
5514
3.49k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
3.49k
         "colons are forbidden from entities names '%s'\n",
5516
3.49k
         name, NULL, NULL);
5517
3.49k
  }
5518
1.10M
  if (SKIP_BLANKS == 0) {
5519
17.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
17.3k
         "Space required after the entity name\n");
5521
17.3k
  }
5522
5523
1.10M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
1.10M
  if (isParameter) {
5528
445k
      if ((RAW == '"') || (RAW == '\'')) {
5529
405k
          value = xmlParseEntityValue(ctxt, &orig);
5530
405k
    if (value) {
5531
381k
        if ((ctxt->sax != NULL) &&
5532
381k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
349k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
349k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
349k
            NULL, NULL, value);
5536
381k
    }
5537
405k
      } else {
5538
39.8k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
39.8k
    if ((URI == NULL) && (literal == NULL)) {
5540
4.72k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
4.72k
    }
5542
39.8k
    if (URI) {
5543
33.9k
        xmlURIPtr uri;
5544
5545
33.9k
        uri = xmlParseURI((const char *) URI);
5546
33.9k
        if (uri == NULL) {
5547
1.87k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.87k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
32.0k
        } else {
5555
32.0k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
143
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
31.9k
      } else {
5562
31.9k
          if ((ctxt->sax != NULL) &&
5563
31.9k
        (!ctxt->disableSAX) &&
5564
31.9k
        (ctxt->sax->entityDecl != NULL))
5565
30.1k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
30.1k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
30.1k
              literal, URI, NULL);
5568
31.9k
      }
5569
32.0k
      xmlFreeURI(uri);
5570
32.0k
        }
5571
33.9k
    }
5572
39.8k
      }
5573
657k
  } else {
5574
657k
      if ((RAW == '"') || (RAW == '\'')) {
5575
548k
          value = xmlParseEntityValue(ctxt, &orig);
5576
548k
    if ((ctxt->sax != NULL) &&
5577
548k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
485k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
485k
        XML_INTERNAL_GENERAL_ENTITY,
5580
485k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
548k
    if ((ctxt->myDoc == NULL) ||
5585
548k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
12.7k
        if (ctxt->myDoc == NULL) {
5587
2.17k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
2.17k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
2.17k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
2.17k
        }
5594
12.7k
        if (ctxt->myDoc->intSubset == NULL)
5595
2.17k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
2.17k
              BAD_CAST "fake", NULL, NULL);
5597
5598
12.7k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
12.7k
                    NULL, NULL, value);
5600
12.7k
    }
5601
548k
      } else {
5602
108k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
108k
    if ((URI == NULL) && (literal == NULL)) {
5604
15.7k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
15.7k
    }
5606
108k
    if (URI) {
5607
88.2k
        xmlURIPtr uri;
5608
5609
88.2k
        uri = xmlParseURI((const char *)URI);
5610
88.2k
        if (uri == NULL) {
5611
7.67k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
7.67k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
80.6k
        } else {
5619
80.6k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
3.67k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
3.67k
      }
5626
80.6k
      xmlFreeURI(uri);
5627
80.6k
        }
5628
88.2k
    }
5629
108k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
14.3k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
14.3k
           "Space required before 'NDATA'\n");
5632
14.3k
    }
5633
108k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
31.1k
        SKIP(5);
5635
31.1k
        if (SKIP_BLANKS == 0) {
5636
3.81k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
3.81k
               "Space required after 'NDATA'\n");
5638
3.81k
        }
5639
31.1k
        ndata = xmlParseName(ctxt);
5640
31.1k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
31.1k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
27.0k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
27.0k
            literal, URI, ndata);
5644
77.5k
    } else {
5645
77.5k
        if ((ctxt->sax != NULL) &&
5646
77.5k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
65.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
65.0k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
65.0k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
77.5k
        if ((ctxt->replaceEntities != 0) &&
5655
77.5k
      ((ctxt->myDoc == NULL) ||
5656
45.9k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
2.31k
      if (ctxt->myDoc == NULL) {
5658
1.19k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.19k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.19k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.19k
      }
5665
5666
2.31k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.19k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.19k
            BAD_CAST "fake", NULL, NULL);
5669
2.31k
      xmlSAX2EntityDecl(ctxt, name,
5670
2.31k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
2.31k
                  literal, URI, NULL);
5672
2.31k
        }
5673
77.5k
    }
5674
108k
      }
5675
657k
  }
5676
1.10M
  if (ctxt->instate == XML_PARSER_EOF)
5677
520
      goto done;
5678
1.10M
  SKIP_BLANKS;
5679
1.10M
  if (RAW != '>') {
5680
34.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
34.7k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
34.7k
      xmlHaltParser(ctxt);
5683
1.06M
  } else {
5684
1.06M
      if (inputid != ctxt->input->id) {
5685
95
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
95
                         "Entity declaration doesn't start and stop in"
5687
95
                               " the same entity\n");
5688
95
      }
5689
1.06M
      NEXT;
5690
1.06M
  }
5691
1.10M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
921k
      xmlEntityPtr cur = NULL;
5696
5697
921k
      if (isParameter) {
5698
391k
          if ((ctxt->sax != NULL) &&
5699
391k
        (ctxt->sax->getParameterEntity != NULL))
5700
391k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
529k
      } else {
5702
529k
          if ((ctxt->sax != NULL) &&
5703
529k
        (ctxt->sax->getEntity != NULL))
5704
529k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
529k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
35.5k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
35.5k
    }
5708
529k
      }
5709
921k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
741k
    cur->orig = orig;
5711
741k
                orig = NULL;
5712
741k
      }
5713
921k
  }
5714
5715
1.10M
done:
5716
1.10M
  if (value != NULL) xmlFree(value);
5717
1.10M
  if (URI != NULL) xmlFree(URI);
5718
1.10M
  if (literal != NULL) xmlFree(literal);
5719
1.10M
        if (orig != NULL) xmlFree(orig);
5720
1.10M
    }
5721
1.11M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
1.86M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
1.86M
    int val;
5757
1.86M
    xmlChar *ret;
5758
5759
1.86M
    *value = NULL;
5760
1.86M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
134k
  SKIP(9);
5762
134k
  return(XML_ATTRIBUTE_REQUIRED);
5763
134k
    }
5764
1.72M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
1.39M
  SKIP(8);
5766
1.39M
  return(XML_ATTRIBUTE_IMPLIED);
5767
1.39M
    }
5768
332k
    val = XML_ATTRIBUTE_NONE;
5769
332k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
128k
  SKIP(6);
5771
128k
  val = XML_ATTRIBUTE_FIXED;
5772
128k
  if (SKIP_BLANKS == 0) {
5773
704
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
704
         "Space required after '#FIXED'\n");
5775
704
  }
5776
128k
    }
5777
332k
    ret = xmlParseAttValue(ctxt);
5778
332k
    ctxt->instate = XML_PARSER_DTD;
5779
332k
    if (ret == NULL) {
5780
9.76k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
9.76k
           "Attribute default value declaration error\n");
5782
9.76k
    } else
5783
322k
        *value = ret;
5784
332k
    return(val);
5785
1.72M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
18.0k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
18.0k
    const xmlChar *name;
5809
18.0k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
18.0k
    if (RAW != '(') {
5812
725
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
725
  return(NULL);
5814
725
    }
5815
17.3k
    SHRINK;
5816
22.7k
    do {
5817
22.7k
        NEXT;
5818
22.7k
  SKIP_BLANKS;
5819
22.7k
        name = xmlParseName(ctxt);
5820
22.7k
  if (name == NULL) {
5821
709
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
709
         "Name expected in NOTATION declaration\n");
5823
709
            xmlFreeEnumeration(ret);
5824
709
      return(NULL);
5825
709
  }
5826
22.0k
  tmp = ret;
5827
36.8k
  while (tmp != NULL) {
5828
16.4k
      if (xmlStrEqual(name, tmp->name)) {
5829
1.62k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
1.62k
    "standalone: attribute notation value token %s duplicated\n",
5831
1.62k
         name, NULL);
5832
1.62k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
1.62k
    break;
5835
1.62k
      }
5836
14.8k
      tmp = tmp->next;
5837
14.8k
  }
5838
22.0k
  if (tmp == NULL) {
5839
20.4k
      cur = xmlCreateEnumeration(name);
5840
20.4k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
20.4k
      if (last == NULL) ret = last = cur;
5845
3.65k
      else {
5846
3.65k
    last->next = cur;
5847
3.65k
    last = cur;
5848
3.65k
      }
5849
20.4k
  }
5850
22.0k
  SKIP_BLANKS;
5851
22.0k
    } while (RAW == '|');
5852
16.6k
    if (RAW != ')') {
5853
3.51k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
3.51k
        xmlFreeEnumeration(ret);
5855
3.51k
  return(NULL);
5856
3.51k
    }
5857
13.1k
    NEXT;
5858
13.1k
    return(ret);
5859
16.6k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
208k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
208k
    xmlChar *name;
5881
208k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
208k
    if (RAW != '(') {
5884
21.4k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
21.4k
  return(NULL);
5886
21.4k
    }
5887
187k
    SHRINK;
5888
519k
    do {
5889
519k
        NEXT;
5890
519k
  SKIP_BLANKS;
5891
519k
        name = xmlParseNmtoken(ctxt);
5892
519k
  if (name == NULL) {
5893
2.59k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
2.59k
      return(ret);
5895
2.59k
  }
5896
517k
  tmp = ret;
5897
1.31M
  while (tmp != NULL) {
5898
808k
      if (xmlStrEqual(name, tmp->name)) {
5899
10.2k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
10.2k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
10.2k
         name, NULL);
5902
10.2k
    if (!xmlDictOwns(ctxt->dict, name))
5903
10.2k
        xmlFree(name);
5904
10.2k
    break;
5905
10.2k
      }
5906
798k
      tmp = tmp->next;
5907
798k
  }
5908
517k
  if (tmp == NULL) {
5909
506k
      cur = xmlCreateEnumeration(name);
5910
506k
      if (!xmlDictOwns(ctxt->dict, name))
5911
506k
    xmlFree(name);
5912
506k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
506k
      if (last == NULL) ret = last = cur;
5917
322k
      else {
5918
322k
    last->next = cur;
5919
322k
    last = cur;
5920
322k
      }
5921
506k
  }
5922
517k
  SKIP_BLANKS;
5923
517k
    } while (RAW == '|');
5924
184k
    if (RAW != ')') {
5925
4.94k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
4.94k
  return(ret);
5927
4.94k
    }
5928
179k
    NEXT;
5929
179k
    return(ret);
5930
184k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
226k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
226k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
18.4k
  SKIP(8);
5953
18.4k
  if (SKIP_BLANKS == 0) {
5954
350
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
350
         "Space required after 'NOTATION'\n");
5956
350
      return(0);
5957
350
  }
5958
18.0k
  *tree = xmlParseNotationType(ctxt);
5959
18.0k
  if (*tree == NULL) return(0);
5960
13.1k
  return(XML_ATTRIBUTE_NOTATION);
5961
18.0k
    }
5962
208k
    *tree = xmlParseEnumerationType(ctxt);
5963
208k
    if (*tree == NULL) return(0);
5964
184k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
208k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
1.90M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
1.90M
    SHRINK;
6017
1.90M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
673k
  SKIP(5);
6019
673k
  return(XML_ATTRIBUTE_CDATA);
6020
1.22M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
31.1k
  SKIP(6);
6022
31.1k
  return(XML_ATTRIBUTE_IDREFS);
6023
1.19M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
51.1k
  SKIP(5);
6025
51.1k
  return(XML_ATTRIBUTE_IDREF);
6026
1.14M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
467k
        SKIP(2);
6028
467k
  return(XML_ATTRIBUTE_ID);
6029
676k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
12.3k
  SKIP(6);
6031
12.3k
  return(XML_ATTRIBUTE_ENTITY);
6032
664k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
6.06k
  SKIP(8);
6034
6.06k
  return(XML_ATTRIBUTE_ENTITIES);
6035
657k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
86.7k
  SKIP(8);
6037
86.7k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
571k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
344k
  SKIP(7);
6040
344k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
344k
     }
6042
226k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
1.90M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
865k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
865k
    const xmlChar *elemName;
6061
865k
    const xmlChar *attrName;
6062
865k
    xmlEnumerationPtr tree;
6063
6064
865k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
865k
    SKIP(2);
6067
6068
865k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
861k
  int inputid = ctxt->input->id;
6070
6071
861k
  SKIP(7);
6072
861k
  if (SKIP_BLANKS == 0) {
6073
9.07k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
9.07k
                     "Space required after '<!ATTLIST'\n");
6075
9.07k
  }
6076
861k
        elemName = xmlParseName(ctxt);
6077
861k
  if (elemName == NULL) {
6078
4.23k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
4.23k
         "ATTLIST: no name for Element\n");
6080
4.23k
      return;
6081
4.23k
  }
6082
856k
  SKIP_BLANKS;
6083
856k
  GROW;
6084
2.65M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
1.94M
      int type;
6086
1.94M
      int def;
6087
1.94M
      xmlChar *defaultValue = NULL;
6088
6089
1.94M
      GROW;
6090
1.94M
            tree = NULL;
6091
1.94M
      attrName = xmlParseName(ctxt);
6092
1.94M
      if (attrName == NULL) {
6093
21.1k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
21.1k
             "ATTLIST: no name for Attribute\n");
6095
21.1k
    break;
6096
21.1k
      }
6097
1.92M
      GROW;
6098
1.92M
      if (SKIP_BLANKS == 0) {
6099
20.5k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
20.5k
            "Space required after the attribute name\n");
6101
20.5k
    break;
6102
20.5k
      }
6103
6104
1.90M
      type = xmlParseAttributeType(ctxt, &tree);
6105
1.90M
      if (type <= 0) {
6106
29.1k
          break;
6107
29.1k
      }
6108
6109
1.87M
      GROW;
6110
1.87M
      if (SKIP_BLANKS == 0) {
6111
9.88k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
9.88k
             "Space required after the attribute type\n");
6113
9.88k
          if (tree != NULL)
6114
6.59k
        xmlFreeEnumeration(tree);
6115
9.88k
    break;
6116
9.88k
      }
6117
6118
1.86M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
1.86M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
1.86M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
177k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
1.86M
      GROW;
6130
1.86M
            if (RAW != '>') {
6131
1.61M
    if (SKIP_BLANKS == 0) {
6132
62.1k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
62.1k
      "Space required after the attribute default value\n");
6134
62.1k
        if (defaultValue != NULL)
6135
52.0k
      xmlFree(defaultValue);
6136
62.1k
        if (tree != NULL)
6137
11.8k
      xmlFreeEnumeration(tree);
6138
62.1k
        break;
6139
62.1k
    }
6140
1.61M
      }
6141
1.79M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
1.79M
    (ctxt->sax->attributeDecl != NULL))
6143
1.64M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
1.64M
                          type, def, defaultValue, tree);
6145
155k
      else if (tree != NULL)
6146
14.3k
    xmlFreeEnumeration(tree);
6147
6148
1.79M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
1.79M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
1.79M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
188k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
188k
      }
6153
1.79M
      if (ctxt->sax2) {
6154
1.28M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
1.28M
      }
6156
1.79M
      if (defaultValue != NULL)
6157
270k
          xmlFree(defaultValue);
6158
1.79M
      GROW;
6159
1.79M
  }
6160
856k
  if (RAW == '>') {
6161
720k
      if (inputid != ctxt->input->id) {
6162
49
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
49
                               "Attribute list declaration doesn't start and"
6164
49
                               " stop in the same entity\n");
6165
49
      }
6166
720k
      NEXT;
6167
720k
  }
6168
856k
    }
6169
865k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
378k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
378k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
378k
    const xmlChar *elem = NULL;
6196
6197
378k
    GROW;
6198
378k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
378k
  SKIP(7);
6200
378k
  SKIP_BLANKS;
6201
378k
  SHRINK;
6202
378k
  if (RAW == ')') {
6203
247k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
247k
      NEXT;
6209
247k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
247k
      if (ret == NULL)
6211
0
          return(NULL);
6212
247k
      if (RAW == '*') {
6213
365
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
365
    NEXT;
6215
365
      }
6216
247k
      return(ret);
6217
247k
  }
6218
131k
  if ((RAW == '(') || (RAW == '|')) {
6219
129k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
129k
      if (ret == NULL) return(NULL);
6221
129k
  }
6222
1.29M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
1.16M
      NEXT;
6224
1.16M
      if (elem == NULL) {
6225
129k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
129k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
129k
    ret->c1 = cur;
6231
129k
    if (cur != NULL)
6232
129k
        cur->parent = ret;
6233
129k
    cur = ret;
6234
1.03M
      } else {
6235
1.03M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
1.03M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
1.03M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
1.03M
    if (n->c1 != NULL)
6242
1.03M
        n->c1->parent = n;
6243
1.03M
          cur->c2 = n;
6244
1.03M
    if (n != NULL)
6245
1.03M
        n->parent = cur;
6246
1.03M
    cur = n;
6247
1.03M
      }
6248
1.16M
      SKIP_BLANKS;
6249
1.16M
      elem = xmlParseName(ctxt);
6250
1.16M
      if (elem == NULL) {
6251
1.00k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
1.00k
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
1.00k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
1.00k
    return(NULL);
6255
1.00k
      }
6256
1.16M
      SKIP_BLANKS;
6257
1.16M
      GROW;
6258
1.16M
  }
6259
130k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
124k
      if (elem != NULL) {
6261
124k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
124k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
124k
    if (cur->c2 != NULL)
6264
124k
        cur->c2->parent = cur;
6265
124k
            }
6266
124k
            if (ret != NULL)
6267
124k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
124k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
124k
      SKIP(2);
6274
124k
  } else {
6275
5.55k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
5.55k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
5.55k
      return(NULL);
6278
5.55k
  }
6279
6280
130k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
124k
    return(ret);
6284
378k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
1.66M
                                       int depth) {
6321
1.66M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
1.66M
    const xmlChar *elem;
6323
1.66M
    xmlChar type = 0;
6324
6325
1.66M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
1.66M
        (depth >  2048)) {
6327
208
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
208
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
208
                          depth);
6330
208
  return(NULL);
6331
208
    }
6332
1.66M
    SKIP_BLANKS;
6333
1.66M
    GROW;
6334
1.66M
    if (RAW == '(') {
6335
1.20M
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
1.20M
  NEXT;
6339
1.20M
  SKIP_BLANKS;
6340
1.20M
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
1.20M
                                                           depth + 1);
6342
1.20M
        if (cur == NULL)
6343
1.16M
            return(NULL);
6344
37.6k
  SKIP_BLANKS;
6345
37.6k
  GROW;
6346
463k
    } else {
6347
463k
  elem = xmlParseName(ctxt);
6348
463k
  if (elem == NULL) {
6349
9.20k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
9.20k
      return(NULL);
6351
9.20k
  }
6352
454k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
454k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
454k
  GROW;
6358
454k
  if (RAW == '?') {
6359
36.5k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
36.5k
      NEXT;
6361
417k
  } else if (RAW == '*') {
6362
37.9k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
37.9k
      NEXT;
6364
379k
  } else if (RAW == '+') {
6365
62.3k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
62.3k
      NEXT;
6367
317k
  } else {
6368
317k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
317k
  }
6370
454k
  GROW;
6371
454k
    }
6372
491k
    SKIP_BLANKS;
6373
491k
    SHRINK;
6374
1.77M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
1.33M
        if (RAW == ',') {
6379
431k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
258k
      else if (type != CUR) {
6385
164
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
164
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
164
                      type);
6388
164
    if ((last != NULL) && (last != ret))
6389
164
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
164
    if (ret != NULL)
6391
164
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
164
    return(NULL);
6393
164
      }
6394
431k
      NEXT;
6395
6396
431k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
431k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
431k
      if (last == NULL) {
6404
173k
    op->c1 = ret;
6405
173k
    if (ret != NULL)
6406
173k
        ret->parent = op;
6407
173k
    ret = cur = op;
6408
258k
      } else {
6409
258k
          cur->c2 = op;
6410
258k
    if (op != NULL)
6411
258k
        op->parent = cur;
6412
258k
    op->c1 = last;
6413
258k
    if (last != NULL)
6414
258k
        last->parent = op;
6415
258k
    cur =op;
6416
258k
    last = NULL;
6417
258k
      }
6418
901k
  } else if (RAW == '|') {
6419
868k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
739k
      else if (type != CUR) {
6425
167
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
167
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
167
          type);
6428
167
    if ((last != NULL) && (last != ret))
6429
167
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
167
    if (ret != NULL)
6431
167
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
167
    return(NULL);
6433
167
      }
6434
868k
      NEXT;
6435
6436
868k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
868k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
868k
      if (last == NULL) {
6445
128k
    op->c1 = ret;
6446
128k
    if (ret != NULL)
6447
128k
        ret->parent = op;
6448
128k
    ret = cur = op;
6449
739k
      } else {
6450
739k
          cur->c2 = op;
6451
739k
    if (op != NULL)
6452
739k
        op->parent = cur;
6453
739k
    op->c1 = last;
6454
739k
    if (last != NULL)
6455
739k
        last->parent = op;
6456
739k
    cur =op;
6457
739k
    last = NULL;
6458
739k
      }
6459
868k
  } else {
6460
32.4k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
32.4k
      if ((last != NULL) && (last != ret))
6462
15.1k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
32.4k
      if (ret != NULL)
6464
32.4k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
32.4k
      return(NULL);
6466
32.4k
  }
6467
1.29M
  GROW;
6468
1.29M
  SKIP_BLANKS;
6469
1.29M
  GROW;
6470
1.29M
  if (RAW == '(') {
6471
72.1k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
72.1k
      NEXT;
6474
72.1k
      SKIP_BLANKS;
6475
72.1k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
72.1k
                                                          depth + 1);
6477
72.1k
            if (last == NULL) {
6478
12.5k
    if (ret != NULL)
6479
12.5k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
12.5k
    return(NULL);
6481
12.5k
            }
6482
59.5k
      SKIP_BLANKS;
6483
1.22M
  } else {
6484
1.22M
      elem = xmlParseName(ctxt);
6485
1.22M
      if (elem == NULL) {
6486
4.10k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
4.10k
    if (ret != NULL)
6488
4.10k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
4.10k
    return(NULL);
6490
4.10k
      }
6491
1.22M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
1.22M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
1.22M
      if (RAW == '?') {
6498
149k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
149k
    NEXT;
6500
1.07M
      } else if (RAW == '*') {
6501
85.6k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
85.6k
    NEXT;
6503
988k
      } else if (RAW == '+') {
6504
28.3k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
28.3k
    NEXT;
6506
960k
      } else {
6507
960k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
960k
      }
6509
1.22M
  }
6510
1.28M
  SKIP_BLANKS;
6511
1.28M
  GROW;
6512
1.28M
    }
6513
442k
    if ((cur != NULL) && (last != NULL)) {
6514
270k
        cur->c2 = last;
6515
270k
  if (last != NULL)
6516
270k
      last->parent = cur;
6517
270k
    }
6518
442k
    if (ctxt->input->id != inputchk) {
6519
45
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
45
                       "Element content declaration doesn't start and stop in"
6521
45
                       " the same entity\n");
6522
45
    }
6523
442k
    NEXT;
6524
442k
    if (RAW == '?') {
6525
10.5k
  if (ret != NULL) {
6526
10.5k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
10.5k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
618
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
9.96k
      else
6530
9.96k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
10.5k
  }
6532
10.5k
  NEXT;
6533
431k
    } else if (RAW == '*') {
6534
96.8k
  if (ret != NULL) {
6535
96.8k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
96.8k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
627k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
530k
    if ((cur->c1 != NULL) &&
6543
530k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
530k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
25.3k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
530k
    if ((cur->c2 != NULL) &&
6547
530k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
530k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
4.12k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
530k
    cur = cur->c2;
6551
530k
      }
6552
96.8k
  }
6553
96.8k
  NEXT;
6554
335k
    } else if (RAW == '+') {
6555
58.4k
  if (ret != NULL) {
6556
58.4k
      int found = 0;
6557
6558
58.4k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
58.4k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
856
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
57.5k
      else
6562
57.5k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
93.6k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
35.2k
    if ((cur->c1 != NULL) &&
6570
35.2k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
35.2k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
744
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
744
        found = 1;
6574
744
    }
6575
35.2k
    if ((cur->c2 != NULL) &&
6576
35.2k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
35.2k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
699
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
699
        found = 1;
6580
699
    }
6581
35.2k
    cur = cur->c2;
6582
35.2k
      }
6583
58.4k
      if (found)
6584
1.07k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
58.4k
  }
6586
58.4k
  NEXT;
6587
58.4k
    }
6588
442k
    return(ret);
6589
491k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
769k
                           xmlElementContentPtr *result) {
6648
6649
769k
    xmlElementContentPtr tree = NULL;
6650
769k
    int inputid = ctxt->input->id;
6651
769k
    int res;
6652
6653
769k
    *result = NULL;
6654
6655
769k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
769k
    NEXT;
6661
769k
    GROW;
6662
769k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
769k
    SKIP_BLANKS;
6665
769k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
378k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
378k
  res = XML_ELEMENT_TYPE_MIXED;
6668
391k
    } else {
6669
391k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
391k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
391k
    }
6672
769k
    SKIP_BLANKS;
6673
769k
    *result = tree;
6674
769k
    return(res);
6675
769k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
1.01M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
1.01M
    const xmlChar *name;
6695
1.01M
    int ret = -1;
6696
1.01M
    xmlElementContentPtr content  = NULL;
6697
6698
1.01M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
1.01M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
1.01M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
1.00M
  int inputid = ctxt->input->id;
6705
6706
1.00M
  SKIP(7);
6707
1.00M
  if (SKIP_BLANKS == 0) {
6708
3.91k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
3.91k
               "Space required after 'ELEMENT'\n");
6710
3.91k
      return(-1);
6711
3.91k
  }
6712
1.00M
        name = xmlParseName(ctxt);
6713
1.00M
  if (name == NULL) {
6714
4.90k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
4.90k
         "xmlParseElementDecl: no name for Element\n");
6716
4.90k
      return(-1);
6717
4.90k
  }
6718
1.00M
  if (SKIP_BLANKS == 0) {
6719
22.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
22.7k
         "Space required after the element name\n");
6721
22.7k
  }
6722
1.00M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
195k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
195k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
804k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
804k
             (NXT(2) == 'Y')) {
6730
5.78k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
5.78k
      ret = XML_ELEMENT_TYPE_ANY;
6735
798k
  } else if (RAW == '(') {
6736
769k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
769k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
29.2k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
29.2k
          (ctxt->inputNr == 1)) {
6743
2.48k
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
2.48k
    "PEReference: forbidden within markup decl in internal subset\n");
6745
26.7k
      } else {
6746
26.7k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
26.7k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
26.7k
            }
6749
29.2k
      return(-1);
6750
29.2k
  }
6751
6752
971k
  SKIP_BLANKS;
6753
6754
971k
  if (RAW != '>') {
6755
46.7k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
46.7k
      if (content != NULL) {
6757
4.43k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
4.43k
      }
6759
924k
  } else {
6760
924k
      if (inputid != ctxt->input->id) {
6761
46
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
46
                               "Element declaration doesn't start and stop in"
6763
46
                               " the same entity\n");
6764
46
      }
6765
6766
924k
      NEXT;
6767
924k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
924k
    (ctxt->sax->elementDecl != NULL)) {
6769
813k
    if (content != NULL)
6770
622k
        content->parent = NULL;
6771
813k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
813k
                           content);
6773
813k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
57.3k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
57.3k
    }
6782
813k
      } else if (content != NULL) {
6783
90.3k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
90.3k
      }
6785
924k
  }
6786
971k
    }
6787
976k
    return(ret);
6788
1.01M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
11.2k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
11.2k
    int *inputIds = NULL;
6806
11.2k
    size_t inputIdsSize = 0;
6807
11.2k
    size_t depth = 0;
6808
6809
46.1k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
45.7k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
23.9k
            int id = ctxt->input->id;
6812
6813
23.9k
            SKIP(3);
6814
23.9k
            SKIP_BLANKS;
6815
6816
23.9k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
17.2k
                SKIP(7);
6818
17.2k
                SKIP_BLANKS;
6819
17.2k
                if (RAW != '[') {
6820
298
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
298
                    xmlHaltParser(ctxt);
6822
298
                    goto error;
6823
298
                }
6824
16.9k
                if (ctxt->input->id != id) {
6825
15
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
15
                                   "All markup of the conditional section is"
6827
15
                                   " not in the same entity\n");
6828
15
                }
6829
16.9k
                NEXT;
6830
6831
16.9k
                if (inputIdsSize <= depth) {
6832
6.30k
                    int *tmp;
6833
6834
6.30k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
6.30k
                    tmp = (int *) xmlRealloc(inputIds,
6836
6.30k
                            inputIdsSize * sizeof(int));
6837
6.30k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
6.30k
                    inputIds = tmp;
6842
6.30k
                }
6843
16.9k
                inputIds[depth] = id;
6844
16.9k
                depth++;
6845
16.9k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
4.64k
                size_t ignoreDepth = 0;
6847
6848
4.64k
                SKIP(6);
6849
4.64k
                SKIP_BLANKS;
6850
4.64k
                if (RAW != '[') {
6851
228
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
228
                    xmlHaltParser(ctxt);
6853
228
                    goto error;
6854
228
                }
6855
4.41k
                if (ctxt->input->id != id) {
6856
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
6
                                   "All markup of the conditional section is"
6858
6
                                   " not in the same entity\n");
6859
6
                }
6860
4.41k
                NEXT;
6861
6862
2.77M
                while (RAW != 0) {
6863
2.77M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
11.9k
                        SKIP(3);
6865
11.9k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
11.9k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
2.75M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
2.75M
                               (NXT(2) == '>')) {
6873
7.33k
                        if (ignoreDepth == 0)
6874
1.22k
                            break;
6875
6.10k
                        SKIP(3);
6876
6.10k
                        ignoreDepth--;
6877
2.75M
                    } else {
6878
2.75M
                        NEXT;
6879
2.75M
                    }
6880
2.77M
                }
6881
6882
4.41k
    if (RAW == 0) {
6883
3.18k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
3.18k
                    goto error;
6885
3.18k
    }
6886
1.22k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
1.22k
                SKIP(3);
6892
2.06k
            } else {
6893
2.06k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
2.06k
                xmlHaltParser(ctxt);
6895
2.06k
                goto error;
6896
2.06k
            }
6897
23.9k
        } else if ((depth > 0) &&
6898
21.8k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
6.76k
            depth--;
6900
6.76k
            if (ctxt->input->id != inputIds[depth]) {
6901
116
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
116
                               "All markup of the conditional section is not"
6903
116
                               " in the same entity\n");
6904
116
            }
6905
6.76k
            SKIP(3);
6906
15.0k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
12.5k
            xmlParseMarkupDecl(ctxt);
6908
12.5k
        } else {
6909
2.51k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
2.51k
            xmlHaltParser(ctxt);
6911
2.51k
            goto error;
6912
2.51k
        }
6913
6914
37.4k
        if (depth == 0)
6915
2.64k
            break;
6916
6917
34.8k
        SKIP_BLANKS;
6918
34.8k
        GROW;
6919
34.8k
    }
6920
6921
11.2k
error:
6922
11.2k
    xmlFree(inputIds);
6923
11.2k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
18.8M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
18.8M
    GROW;
6952
18.8M
    if (CUR == '<') {
6953
18.8M
        if (NXT(1) == '!') {
6954
18.7M
      switch (NXT(2)) {
6955
2.13M
          case 'E':
6956
2.13M
        if (NXT(3) == 'L')
6957
1.01M
      xmlParseElementDecl(ctxt);
6958
1.12M
        else if (NXT(3) == 'N')
6959
1.11M
      xmlParseEntityDecl(ctxt);
6960
2.07k
                    else
6961
2.07k
                        SKIP(2);
6962
2.13M
        break;
6963
865k
          case 'A':
6964
865k
        xmlParseAttributeListDecl(ctxt);
6965
865k
        break;
6966
26.9k
          case 'N':
6967
26.9k
        xmlParseNotationDecl(ctxt);
6968
26.9k
        break;
6969
15.7M
          case '-':
6970
15.7M
        xmlParseComment(ctxt);
6971
15.7M
        break;
6972
34.5k
    default:
6973
        /* there is an error but it will be detected later */
6974
34.5k
                    SKIP(2);
6975
34.5k
        break;
6976
18.7M
      }
6977
18.7M
  } else if (NXT(1) == '?') {
6978
34.3k
      xmlParsePI(ctxt);
6979
34.3k
  }
6980
18.8M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
18.8M
    if (ctxt->instate == XML_PARSER_EOF)
6987
35.3k
        return;
6988
6989
18.7M
    ctxt->instate = XML_PARSER_DTD;
6990
18.7M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
18.1k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
18.1k
    xmlChar *version;
7006
18.1k
    const xmlChar *encoding;
7007
18.1k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
18.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
17.5k
  SKIP(5);
7014
17.5k
    } else {
7015
577
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
577
  return;
7017
577
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
17.5k
    oldstate = ctxt->instate;
7021
17.5k
    ctxt->instate = XML_PARSER_START;
7022
7023
17.5k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
17.5k
    version = xmlParseVersionInfo(ctxt);
7032
17.5k
    if (version == NULL)
7033
4.94k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
12.6k
    else {
7035
12.6k
  if (SKIP_BLANKS == 0) {
7036
1.40k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.40k
               "Space needed here\n");
7038
1.40k
  }
7039
12.6k
    }
7040
17.5k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
17.5k
    encoding = xmlParseEncodingDecl(ctxt);
7046
17.5k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
17.5k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
298
        ctxt->instate = oldstate;
7053
298
        return;
7054
298
    }
7055
17.2k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
7.84k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
7.84k
           "Missing encoding in text declaration\n");
7058
7.84k
    }
7059
7060
17.2k
    SKIP_BLANKS;
7061
17.2k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
7.11k
        SKIP(2);
7063
10.1k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
162
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
162
  NEXT;
7067
10.0k
    } else {
7068
10.0k
        int c;
7069
7070
10.0k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
637k
        while ((c = CUR) != 0) {
7072
634k
            NEXT;
7073
634k
            if (c == '>')
7074
7.79k
                break;
7075
634k
        }
7076
10.0k
    }
7077
7078
17.2k
    ctxt->instate = oldstate;
7079
17.2k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
63.7k
                       const xmlChar *SystemID) {
7096
63.7k
    xmlDetectSAX2(ctxt);
7097
63.7k
    GROW;
7098
7099
63.7k
    if ((ctxt->encoding == NULL) &&
7100
63.7k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
63.5k
        xmlChar start[4];
7102
63.5k
  xmlCharEncoding enc;
7103
7104
63.5k
  start[0] = RAW;
7105
63.5k
  start[1] = NXT(1);
7106
63.5k
  start[2] = NXT(2);
7107
63.5k
  start[3] = NXT(3);
7108
63.5k
  enc = xmlDetectCharEncoding(start, 4);
7109
63.5k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
17.0k
      xmlSwitchEncoding(ctxt, enc);
7111
63.5k
    }
7112
7113
63.7k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
15.4k
  xmlParseTextDecl(ctxt);
7115
15.4k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
269
      xmlHaltParser(ctxt);
7120
269
      return;
7121
269
  }
7122
15.4k
    }
7123
63.4k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
63.4k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
63.4k
    ctxt->instate = XML_PARSER_DTD;
7135
63.4k
    ctxt->external = 1;
7136
63.4k
    SKIP_BLANKS;
7137
2.22M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
2.18M
  GROW;
7139
2.18M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
11.2k
            xmlParseConditionalSections(ctxt);
7141
2.17M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
2.15M
            xmlParseMarkupDecl(ctxt);
7143
2.15M
        } else {
7144
20.5k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
20.5k
            xmlHaltParser(ctxt);
7146
20.5k
            return;
7147
20.5k
        }
7148
2.16M
        SKIP_BLANKS;
7149
2.16M
    }
7150
7151
42.9k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
42.9k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
6.01M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
6.01M
    xmlEntityPtr ent;
7175
6.01M
    xmlChar *val;
7176
6.01M
    int was_checked;
7177
6.01M
    xmlNodePtr list = NULL;
7178
6.01M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
6.01M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
6.01M
    if (NXT(1) == '#') {
7188
1.31M
  int i = 0;
7189
1.31M
  xmlChar out[16];
7190
1.31M
  int hex = NXT(2);
7191
1.31M
  int value = xmlParseCharRef(ctxt);
7192
7193
1.31M
  if (value == 0)
7194
353k
      return;
7195
959k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
772k
      if (value <= 0xFF) {
7202
725k
    out[0] = value;
7203
725k
    out[1] = 0;
7204
725k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
725k
        (!ctxt->disableSAX))
7206
638k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
725k
      } else {
7208
46.1k
    if ((hex == 'x') || (hex == 'X'))
7209
12.9k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
33.2k
    else
7211
33.2k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
46.1k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
46.1k
        (!ctxt->disableSAX))
7214
38.3k
        ctxt->sax->reference(ctxt->userData, out);
7215
46.1k
      }
7216
772k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
187k
      COPY_BUF(0 ,out, i, value);
7221
187k
      out[i] = 0;
7222
187k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
187k
    (!ctxt->disableSAX))
7224
174k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
187k
  }
7226
959k
  return;
7227
1.31M
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
4.69M
    ent = xmlParseEntityRef(ctxt);
7233
4.69M
    if (ent == NULL) return;
7234
3.19M
    if (!ctxt->wellFormed)
7235
1.30M
  return;
7236
1.88M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
1.88M
    if ((ent->name == NULL) ||
7240
1.88M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
89.6k
  val = ent->content;
7242
89.6k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
89.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
89.6k
      (!ctxt->disableSAX))
7248
89.6k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
89.6k
  return;
7250
89.6k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
1.79M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
1.79M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
95.4k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
87.5k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
87.5k
  void *user_data;
7273
87.5k
  if (ctxt->userData == ctxt)
7274
87.5k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
87.5k
        ctxt->sizeentcopy = 0;
7280
7281
87.5k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
379
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
379
            xmlHaltParser(ctxt);
7284
379
            return;
7285
379
        }
7286
7287
87.1k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
87.1k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
64.4k
      ctxt->depth++;
7297
64.4k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
64.4k
                                                user_data, &list);
7299
64.4k
      ctxt->depth--;
7300
7301
64.4k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
22.6k
      ctxt->depth++;
7303
22.6k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
22.6k
                                     user_data, ctxt->depth, ent->URI,
7305
22.6k
             ent->ExternalID, &list);
7306
22.6k
      ctxt->depth--;
7307
22.6k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
87.1k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
87.1k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
87.1k
        ent->expandedSize = ctxt->sizeentcopy;
7316
87.1k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
3.52k
            xmlHaltParser(ctxt);
7318
3.52k
      xmlFreeNodeList(list);
7319
3.52k
      return;
7320
3.52k
  }
7321
83.6k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
83.6k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
57.3k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
57.3k
            if ((ctxt->replaceEntities == 0) ||
7333
57.3k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
57.3k
                ((list->type == XML_TEXT_NODE) &&
7335
46.9k
                 (list->next == NULL))) {
7336
46.9k
                ent->owner = 1;
7337
131k
                while (list != NULL) {
7338
84.1k
                    list->parent = (xmlNodePtr) ent;
7339
84.1k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
84.1k
                    if (list->next == NULL)
7342
46.9k
                        ent->last = list;
7343
84.1k
                    list = list->next;
7344
84.1k
                }
7345
46.9k
                list = NULL;
7346
46.9k
            } else {
7347
10.3k
                ent->owner = 0;
7348
64.9k
                while (list != NULL) {
7349
54.5k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
54.5k
                    list->doc = ctxt->myDoc;
7351
54.5k
                    if (list->next == NULL)
7352
10.3k
                        ent->last = list;
7353
54.5k
                    list = list->next;
7354
54.5k
                }
7355
10.3k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
10.3k
            }
7361
57.3k
  } else if ((ret != XML_ERR_OK) &&
7362
26.3k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
12.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
12.9k
         "Entity '%s' failed to parse\n", ent->name);
7365
12.9k
            if (ent->content != NULL)
7366
5.40k
                ent->content[0] = 0;
7367
13.3k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
83.6k
        was_checked = 0;
7374
83.6k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
1.79M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
133k
  if (was_checked != 0) {
7389
99.1k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
99.1k
      if (ctxt->userData == ctxt)
7396
99.1k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
99.1k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
4.85k
    ctxt->depth++;
7402
4.85k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
4.85k
           ent->content, user_data, NULL);
7404
4.85k
    ctxt->depth--;
7405
94.2k
      } else if (ent->etype ==
7406
94.2k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
94.2k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
94.2k
    ctxt->depth++;
7410
94.2k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
94.2k
         ctxt->sax, user_data, ctxt->depth,
7412
94.2k
         ent->URI, ent->ExternalID, NULL);
7413
94.2k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
94.2k
                ctxt->sizeentities = oldsizeentities;
7417
94.2k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
99.1k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
99.1k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
99.1k
  }
7429
133k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
133k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
40.8k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
40.8k
  }
7437
133k
  return;
7438
133k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
1.65M
    if ((was_checked != 0) &&
7445
1.65M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
132
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
1.65M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
1.65M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
381k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
381k
  return;
7458
381k
    }
7459
7460
1.27M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
1.27M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
1.27M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
1.27M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
415k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
415k
    cur = ent->children;
7492
519k
    while (cur != NULL) {
7493
519k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
519k
        if (nw != NULL) {
7495
519k
      if (nw->_private == NULL)
7496
519k
          nw->_private = cur->_private;
7497
519k
      if (firstChild == NULL){
7498
415k
          firstChild = nw;
7499
415k
      }
7500
519k
      nw = xmlAddChild(ctxt->node, nw);
7501
519k
        }
7502
519k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
415k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
415k
          (nw != NULL) &&
7509
415k
          (nw->type == XML_ELEMENT_NODE) &&
7510
415k
          (nw->children == NULL))
7511
5.85k
          nw->extra = 1;
7512
7513
415k
      break;
7514
415k
        }
7515
103k
        cur = cur->next;
7516
103k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
862k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
862k
    xmlNodePtr nw = NULL, cur, next, last,
7523
862k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
862k
    cur = ent->children;
7532
862k
    ent->children = NULL;
7533
862k
    last = ent->last;
7534
862k
    ent->last = NULL;
7535
1.10M
    while (cur != NULL) {
7536
1.10M
        next = cur->next;
7537
1.10M
        cur->next = NULL;
7538
1.10M
        cur->parent = NULL;
7539
1.10M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
1.10M
        if (nw != NULL) {
7541
1.10M
      if (nw->_private == NULL)
7542
1.10M
          nw->_private = cur->_private;
7543
1.10M
      if (firstChild == NULL){
7544
862k
          firstChild = cur;
7545
862k
      }
7546
1.10M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
1.10M
        }
7548
1.10M
        xmlAddChild(ctxt->node, cur);
7549
1.10M
        if (cur == last)
7550
862k
      break;
7551
239k
        cur = next;
7552
239k
    }
7553
862k
    if (ent->owner == 0)
7554
10.3k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
862k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
1.27M
      ctxt->nodemem = 0;
7582
1.27M
      ctxt->nodelen = 0;
7583
1.27M
      return;
7584
1.27M
  }
7585
1.27M
    }
7586
1.27M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
7.10M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
7.10M
    const xmlChar *name;
7621
7.10M
    xmlEntityPtr ent = NULL;
7622
7623
7.10M
    GROW;
7624
7.10M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
7.10M
    if (RAW != '&')
7628
0
        return(NULL);
7629
7.10M
    NEXT;
7630
7.10M
    name = xmlParseName(ctxt);
7631
7.10M
    if (name == NULL) {
7632
521k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
521k
           "xmlParseEntityRef: no name\n");
7634
521k
        return(NULL);
7635
521k
    }
7636
6.58M
    if (RAW != ';') {
7637
387k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
387k
  return(NULL);
7639
387k
    }
7640
6.19M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
6.19M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
4.64M
        ent = xmlGetPredefinedEntity(name);
7647
4.64M
        if (ent != NULL)
7648
421k
            return(ent);
7649
4.64M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
5.77M
    if (ctxt->sax != NULL) {
7656
5.77M
  if (ctxt->sax->getEntity != NULL)
7657
5.77M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
5.77M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
5.77M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
54.6k
      ent = xmlGetPredefinedEntity(name);
7661
5.77M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
5.77M
      (ctxt->userData==ctxt)) {
7663
125k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
125k
  }
7665
5.77M
    }
7666
5.77M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
5.77M
    if (ent == NULL) {
7690
1.06M
  if ((ctxt->standalone == 1) ||
7691
1.06M
      ((ctxt->hasExternalSubset == 0) &&
7692
1.03M
       (ctxt->hasPErefs == 0))) {
7693
441k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
441k
         "Entity '%s' not defined\n", name);
7695
623k
  } else {
7696
623k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
623k
         "Entity '%s' not defined\n", name);
7698
623k
      if ((ctxt->inSubset == 0) &&
7699
623k
    (ctxt->sax != NULL) &&
7700
623k
    (ctxt->sax->reference != NULL)) {
7701
615k
    ctxt->sax->reference(ctxt->userData, name);
7702
615k
      }
7703
623k
  }
7704
1.06M
  ctxt->valid = 0;
7705
1.06M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
4.71M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
2.77k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
2.77k
     "Entity reference to unparsed entity %s\n", name);
7715
2.77k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
4.70M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
4.70M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
15.9k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
15.9k
       "Attribute references external entity '%s'\n", name);
7726
15.9k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
4.69M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
4.69M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.75M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
28.9k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
3.09k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
28.9k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
28.9k
        }
7740
1.75M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
16.4k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
16.4k
                    "'<' in entity '%s' is not allowed in attributes "
7743
16.4k
                    "values\n", name);
7744
1.75M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
2.93M
    else {
7750
2.93M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
2.93M
      default:
7758
2.93M
      break;
7759
2.93M
  }
7760
2.93M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
5.77M
    return(ent);
7769
5.77M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
28.2M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
28.2M
    xmlChar *name;
7805
28.2M
    const xmlChar *ptr;
7806
28.2M
    xmlChar cur;
7807
28.2M
    xmlEntityPtr ent = NULL;
7808
7809
28.2M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
28.2M
    ptr = *str;
7812
28.2M
    cur = *ptr;
7813
28.2M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
28.2M
    ptr++;
7817
28.2M
    name = xmlParseStringName(ctxt, &ptr);
7818
28.2M
    if (name == NULL) {
7819
2.08k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
2.08k
           "xmlParseStringEntityRef: no name\n");
7821
2.08k
  *str = ptr;
7822
2.08k
  return(NULL);
7823
2.08k
    }
7824
28.2M
    if (*ptr != ';') {
7825
7.07k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
7.07k
        xmlFree(name);
7827
7.07k
  *str = ptr;
7828
7.07k
  return(NULL);
7829
7.07k
    }
7830
28.2M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
28.2M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
25.9M
        ent = xmlGetPredefinedEntity(name);
7838
25.9M
        if (ent != NULL) {
7839
23.0k
            xmlFree(name);
7840
23.0k
            *str = ptr;
7841
23.0k
            return(ent);
7842
23.0k
        }
7843
25.9M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
28.1M
    if (ctxt->sax != NULL) {
7850
28.1M
  if (ctxt->sax->getEntity != NULL)
7851
28.1M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
28.1M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
108k
      ent = xmlGetPredefinedEntity(name);
7854
28.1M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
723k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
723k
  }
7857
28.1M
    }
7858
28.1M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
28.1M
    if (ent == NULL) {
7885
723k
  if ((ctxt->standalone == 1) ||
7886
723k
      ((ctxt->hasExternalSubset == 0) &&
7887
720k
       (ctxt->hasPErefs == 0))) {
7888
707k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
707k
         "Entity '%s' not defined\n", name);
7890
707k
  } else {
7891
16.7k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
16.7k
        "Entity '%s' not defined\n",
7893
16.7k
        name);
7894
16.7k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
723k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
27.4M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
395
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
395
     "Entity reference to unparsed entity %s\n", name);
7906
395
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
27.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
27.4M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
1.08k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
1.08k
   "Attribute references external entity '%s'\n", name);
7917
1.08k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
27.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
27.4M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
27.0M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
12.6k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.12k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
12.6k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
12.6k
        }
7931
27.0M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
34.2k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
34.2k
                    "'<' in entity '%s' is not allowed in attributes "
7934
34.2k
                    "values\n", name);
7935
27.0M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
430k
    else {
7941
430k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
430k
      default:
7949
430k
      break;
7950
430k
  }
7951
430k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
28.1M
    xmlFree(name);
7961
28.1M
    *str = ptr;
7962
28.1M
    return(ent);
7963
28.1M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
16.0M
{
8000
16.0M
    const xmlChar *name;
8001
16.0M
    xmlEntityPtr entity = NULL;
8002
16.0M
    xmlParserInputPtr input;
8003
8004
16.0M
    if (RAW != '%')
8005
0
        return;
8006
16.0M
    NEXT;
8007
16.0M
    name = xmlParseName(ctxt);
8008
16.0M
    if (name == NULL) {
8009
94.7k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
94.7k
  return;
8011
94.7k
    }
8012
15.9M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
15.9M
    if (RAW != ';') {
8016
16.1k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
16.1k
        return;
8018
16.1k
    }
8019
8020
15.8M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
15.8M
    if ((ctxt->sax != NULL) &&
8026
15.8M
  (ctxt->sax->getParameterEntity != NULL))
8027
15.8M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
15.8M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
15.8M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
802k
  if ((ctxt->standalone == 1) ||
8040
802k
      ((ctxt->hasExternalSubset == 0) &&
8041
802k
       (ctxt->hasPErefs == 0))) {
8042
4.80k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
4.80k
            "PEReference: %%%s; not found\n",
8044
4.80k
            name);
8045
798k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
798k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
16.0k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
16.0k
                                 "PEReference: %%%s; not found\n",
8056
16.0k
                                 name, NULL);
8057
16.0k
            } else
8058
781k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
781k
                              "PEReference: %%%s; not found\n",
8060
781k
                              name, NULL);
8061
798k
            ctxt->valid = 0;
8062
798k
  }
8063
15.0M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
15.0M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
15.0M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
15.0M
  } else {
8073
15.0M
            xmlChar start[4];
8074
15.0M
            xmlCharEncoding enc;
8075
15.0M
            unsigned long parentConsumed;
8076
15.0M
            xmlEntityPtr oldEnt;
8077
8078
15.0M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
15.0M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
15.0M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
15.0M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
15.0M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
15.0M
    (ctxt->replaceEntities == 0) &&
8084
15.0M
    (ctxt->validate == 0))
8085
774
    return;
8086
8087
15.0M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
241
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
241
                xmlHaltParser(ctxt);
8090
241
                return;
8091
241
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
15.0M
            parentConsumed = ctxt->input->parentConsumed;
8095
15.0M
            oldEnt = ctxt->input->entity;
8096
15.0M
            if ((oldEnt == NULL) ||
8097
15.0M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
14.6M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
701k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
701k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
701k
                                     ctxt->input->cur - ctxt->input->base);
8102
701k
            }
8103
8104
15.0M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
15.0M
      if (xmlPushInput(ctxt, input) < 0) {
8106
11.6k
                xmlFreeInputStream(input);
8107
11.6k
    return;
8108
11.6k
            }
8109
8110
15.0M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
15.0M
            input->parentConsumed = parentConsumed;
8113
8114
15.0M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
6.81k
                GROW
8125
6.81k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
6.81k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
6.76k
                    start[0] = RAW;
8129
6.76k
                    start[1] = NXT(1);
8130
6.76k
                    start[2] = NXT(2);
8131
6.76k
                    start[3] = NXT(3);
8132
6.76k
                    enc = xmlDetectCharEncoding(start, 4);
8133
6.76k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
626
                        xmlSwitchEncoding(ctxt, enc);
8135
626
                    }
8136
6.76k
                }
8137
8138
6.81k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
6.81k
                    (IS_BLANK_CH(NXT(5)))) {
8140
447
                    xmlParseTextDecl(ctxt);
8141
447
                }
8142
6.81k
            }
8143
15.0M
  }
8144
15.0M
    }
8145
15.8M
    ctxt->hasPErefs = 1;
8146
15.8M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
2.91k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
2.91k
    xmlParserInputPtr input;
8162
2.91k
    xmlBufferPtr buf;
8163
2.91k
    int l, c;
8164
2.91k
    int count = 0;
8165
8166
2.91k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
2.91k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
2.91k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
2.91k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
2.91k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
2.91k
    buf = xmlBufferCreate();
8180
2.91k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
2.91k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
2.91k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
2.91k
    if (input == NULL) {
8189
684
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
684
              "xmlLoadEntityContent input error");
8191
684
  xmlBufferFree(buf);
8192
684
        return(-1);
8193
684
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
2.23k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
2.23k
    GROW;
8206
2.23k
    c = CUR_CHAR(l);
8207
2.27M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
2.27M
           (IS_CHAR(c))) {
8209
2.26M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
2.26M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
21.1k
      count = 0;
8212
21.1k
      GROW;
8213
21.1k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
21.1k
  }
8218
2.26M
  NEXTL(l);
8219
2.26M
  c = CUR_CHAR(l);
8220
2.26M
  if (c == 0) {
8221
1.92k
      count = 0;
8222
1.92k
      GROW;
8223
1.92k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
1.92k
      c = CUR_CHAR(l);
8228
1.92k
  }
8229
2.26M
    }
8230
8231
2.23k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
1.39k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
1.39k
        xmlPopInput(ctxt);
8234
1.39k
    } else if (!IS_CHAR(c)) {
8235
842
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
842
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
842
                    c);
8238
842
  xmlBufferFree(buf);
8239
842
  return(-1);
8240
842
    }
8241
1.39k
    entity->content = buf->content;
8242
1.39k
    entity->length = buf->use;
8243
1.39k
    buf->content = NULL;
8244
1.39k
    xmlBufferFree(buf);
8245
8246
1.39k
    return(0);
8247
2.23k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
353k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
353k
    const xmlChar *ptr;
8283
353k
    xmlChar cur;
8284
353k
    xmlChar *name;
8285
353k
    xmlEntityPtr entity = NULL;
8286
8287
353k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
353k
    ptr = *str;
8289
353k
    cur = *ptr;
8290
353k
    if (cur != '%')
8291
0
        return(NULL);
8292
353k
    ptr++;
8293
353k
    name = xmlParseStringName(ctxt, &ptr);
8294
353k
    if (name == NULL) {
8295
4.23k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
4.23k
           "xmlParseStringPEReference: no name\n");
8297
4.23k
  *str = ptr;
8298
4.23k
  return(NULL);
8299
4.23k
    }
8300
349k
    cur = *ptr;
8301
349k
    if (cur != ';') {
8302
1.35k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
1.35k
  xmlFree(name);
8304
1.35k
  *str = ptr;
8305
1.35k
  return(NULL);
8306
1.35k
    }
8307
348k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
348k
    if ((ctxt->sax != NULL) &&
8313
348k
  (ctxt->sax->getParameterEntity != NULL))
8314
348k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
348k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
348k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
18.5k
  if ((ctxt->standalone == 1) ||
8330
18.5k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
60
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
60
     "PEReference: %%%s; not found\n", name);
8333
18.4k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
18.4k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
18.4k
        "PEReference: %%%s; not found\n",
8343
18.4k
        name, NULL);
8344
18.4k
      ctxt->valid = 0;
8345
18.4k
  }
8346
329k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
329k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
329k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
329k
    }
8357
348k
    ctxt->hasPErefs = 1;
8358
348k
    xmlFree(name);
8359
348k
    *str = ptr;
8360
348k
    return(entity);
8361
348k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
501k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
501k
    const xmlChar *name = NULL;
8382
501k
    xmlChar *ExternalID = NULL;
8383
501k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
501k
    SKIP(9);
8389
8390
501k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
501k
    name = xmlParseName(ctxt);
8396
501k
    if (name == NULL) {
8397
7.86k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
7.86k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
7.86k
    }
8400
501k
    ctxt->intSubName = name;
8401
8402
501k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
501k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
501k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
225k
        ctxt->hasExternalSubset = 1;
8411
225k
    }
8412
501k
    ctxt->extSubURI = URI;
8413
501k
    ctxt->extSubSystem = ExternalID;
8414
8415
501k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
501k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
501k
  (!ctxt->disableSAX))
8422
474k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
501k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
501k
    if (RAW == '[')
8431
316k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
185k
    if (RAW != '>') {
8437
39.0k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
39.0k
    }
8439
185k
    NEXT;
8440
185k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
316k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
316k
    if (RAW == '[') {
8457
316k
        int baseInputNr = ctxt->inputNr;
8458
316k
        ctxt->instate = XML_PARSER_DTD;
8459
316k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
316k
  SKIP_BLANKS;
8466
17.0M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
17.0M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
16.8M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
16.8M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
16.8M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
16.6M
          xmlParseMarkupDecl(ctxt);
8478
16.6M
            } else if (RAW == '%') {
8479
138k
          xmlParsePEReference(ctxt);
8480
138k
            } else {
8481
115k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
115k
                        "xmlParseInternalSubset: error detected in"
8483
115k
                        " Markup declaration\n");
8484
115k
                xmlHaltParser(ctxt);
8485
115k
                return;
8486
115k
            }
8487
16.7M
      SKIP_BLANKS;
8488
16.7M
  }
8489
200k
  if (RAW == ']') {
8490
170k
      NEXT;
8491
170k
      SKIP_BLANKS;
8492
170k
  }
8493
200k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
200k
    if (RAW != '>') {
8499
33.0k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
33.0k
  return;
8501
33.0k
    }
8502
167k
    NEXT;
8503
167k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
5.40M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
5.40M
    const xmlChar *name;
8544
5.40M
    xmlChar *val;
8545
8546
5.40M
    *value = NULL;
8547
5.40M
    GROW;
8548
5.40M
    name = xmlParseName(ctxt);
8549
5.40M
    if (name == NULL) {
8550
1.62M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
1.62M
                 "error parsing attribute name\n");
8552
1.62M
        return(NULL);
8553
1.62M
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
3.78M
    SKIP_BLANKS;
8559
3.78M
    if (RAW == '=') {
8560
3.10M
        NEXT;
8561
3.10M
  SKIP_BLANKS;
8562
3.10M
  val = xmlParseAttValue(ctxt);
8563
3.10M
  ctxt->instate = XML_PARSER_CONTENT;
8564
3.10M
    } else {
8565
681k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
681k
         "Specification mandates value for attribute %s\n", name);
8567
681k
  return(name);
8568
681k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
3.10M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
88.6k
  if (!xmlCheckLanguageID(val)) {
8577
73.5k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
73.5k
              "Malformed value for xml:lang : %s\n",
8579
73.5k
        val, NULL);
8580
73.5k
  }
8581
88.6k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
3.10M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
4.83k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
44
      *(ctxt->space) = 0;
8589
4.79k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
242
      *(ctxt->space) = 1;
8591
4.55k
  else {
8592
4.55k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
4.55k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
4.55k
                                 val, NULL);
8595
4.55k
  }
8596
4.83k
    }
8597
8598
3.10M
    *value = val;
8599
3.10M
    return(name);
8600
3.78M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
5.72M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
5.72M
    const xmlChar *name;
8634
5.72M
    const xmlChar *attname;
8635
5.72M
    xmlChar *attvalue;
8636
5.72M
    const xmlChar **atts = ctxt->atts;
8637
5.72M
    int nbatts = 0;
8638
5.72M
    int maxatts = ctxt->maxatts;
8639
5.72M
    int i;
8640
8641
5.72M
    if (RAW != '<') return(NULL);
8642
5.72M
    NEXT1;
8643
8644
5.72M
    name = xmlParseName(ctxt);
8645
5.72M
    if (name == NULL) {
8646
529k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
529k
       "xmlParseStartTag: invalid element name\n");
8648
529k
        return(NULL);
8649
529k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
5.19M
    SKIP_BLANKS;
8657
5.19M
    GROW;
8658
8659
7.62M
    while (((RAW != '>') &&
8660
7.62M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
7.62M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
5.40M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
5.40M
        if (attname == NULL) {
8664
1.62M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
1.62M
         "xmlParseStartTag: problem parsing attributes\n");
8666
1.62M
      break;
8667
1.62M
  }
8668
3.78M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
4.34M
      for (i = 0; i < nbatts;i += 2) {
8675
1.34M
          if (xmlStrEqual(atts[i], attname)) {
8676
42.0k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
42.0k
        xmlFree(attvalue);
8678
42.0k
        goto failed;
8679
42.0k
    }
8680
1.34M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
2.99M
      if (atts == NULL) {
8685
138k
          maxatts = 22; /* allow for 10 attrs by default */
8686
138k
          atts = (const xmlChar **)
8687
138k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
138k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
138k
    ctxt->atts = atts;
8695
138k
    ctxt->maxatts = maxatts;
8696
2.85M
      } else if (nbatts + 4 > maxatts) {
8697
192
          const xmlChar **n;
8698
8699
192
          maxatts *= 2;
8700
192
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
192
               maxatts * sizeof(const xmlChar *));
8702
192
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
192
    atts = n;
8709
192
    ctxt->atts = atts;
8710
192
    ctxt->maxatts = maxatts;
8711
192
      }
8712
2.99M
      atts[nbatts++] = attname;
8713
2.99M
      atts[nbatts++] = attvalue;
8714
2.99M
      atts[nbatts] = NULL;
8715
2.99M
      atts[nbatts + 1] = NULL;
8716
2.99M
  } else {
8717
743k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
743k
  }
8720
8721
3.78M
failed:
8722
8723
3.78M
  GROW
8724
3.78M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
1.35M
      break;
8726
2.42M
  if (SKIP_BLANKS == 0) {
8727
1.29M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
1.29M
         "attributes construct error\n");
8729
1.29M
  }
8730
2.42M
  SHRINK;
8731
2.42M
        GROW;
8732
2.42M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
5.19M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
5.19M
  (!ctxt->disableSAX)) {
8739
4.84M
  if (nbatts > 0)
8740
1.88M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
2.96M
  else
8742
2.96M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
4.84M
    }
8744
8745
5.19M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
7.16M
        for (i = 1;i < nbatts;i+=2)
8748
2.99M
      if (atts[i] != NULL)
8749
2.99M
         xmlFree((xmlChar *) atts[i]);
8750
4.16M
    }
8751
5.19M
    return(name);
8752
5.19M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.43M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.43M
    const xmlChar *name;
8772
8773
1.43M
    GROW;
8774
1.43M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.43M
    SKIP(2);
8780
8781
1.43M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.43M
    GROW;
8787
1.43M
    SKIP_BLANKS;
8788
1.43M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
216k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
216k
    } else
8791
1.21M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.43M
    if (name != (xmlChar*)1) {
8800
399k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
399k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
399k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
399k
                    ctxt->name, line, name);
8804
399k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.43M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.43M
  (!ctxt->disableSAX))
8811
1.30M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.43M
    namePop(ctxt);
8814
1.43M
    spacePop(ctxt);
8815
1.43M
    return;
8816
1.43M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
10.9M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
10.9M
    int i;
8858
8859
10.9M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
28.7M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
19.8M
        if (ctxt->nsTab[i] == prefix) {
8862
1.64M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
41.1k
          return(NULL);
8864
1.60M
      return(ctxt->nsTab[i + 1]);
8865
1.64M
  }
8866
8.90M
    return(NULL);
8867
10.5M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
20.7M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
20.7M
    const xmlChar *l, *p;
8886
8887
20.7M
    GROW;
8888
8889
20.7M
    l = xmlParseNCName(ctxt);
8890
20.7M
    if (l == NULL) {
8891
2.22M
        if (CUR == ':') {
8892
69.5k
      l = xmlParseName(ctxt);
8893
69.5k
      if (l != NULL) {
8894
69.5k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
69.5k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
69.5k
    *prefix = NULL;
8897
69.5k
    return(l);
8898
69.5k
      }
8899
69.5k
  }
8900
2.15M
        return(NULL);
8901
2.22M
    }
8902
18.5M
    if (CUR == ':') {
8903
4.26M
        NEXT;
8904
4.26M
  p = l;
8905
4.26M
  l = xmlParseNCName(ctxt);
8906
4.26M
  if (l == NULL) {
8907
255k
      xmlChar *tmp;
8908
8909
255k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
255k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
255k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
255k
      l = xmlParseNmtoken(ctxt);
8914
255k
      if (l == NULL) {
8915
157k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
157k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
157k
            } else {
8919
98.1k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
98.1k
    xmlFree((char *)l);
8921
98.1k
      }
8922
255k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
255k
      if (tmp != NULL) xmlFree(tmp);
8924
255k
      *prefix = NULL;
8925
255k
      return(p);
8926
255k
  }
8927
4.00M
  if (CUR == ':') {
8928
138k
      xmlChar *tmp;
8929
8930
138k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
138k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
138k
      NEXT;
8933
138k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
138k
      if (tmp != NULL) {
8935
113k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
113k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
113k
    if (tmp != NULL) xmlFree(tmp);
8938
113k
    *prefix = p;
8939
113k
    return(l);
8940
113k
      }
8941
25.0k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
25.0k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
25.0k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
25.0k
      if (tmp != NULL) xmlFree(tmp);
8946
25.0k
      *prefix = p;
8947
25.0k
      return(l);
8948
25.0k
  }
8949
3.87M
  *prefix = p;
8950
3.87M
    } else
8951
14.2M
        *prefix = NULL;
8952
18.1M
    return(l);
8953
18.5M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
740k
                        xmlChar const *prefix) {
8971
740k
    const xmlChar *cmp;
8972
740k
    const xmlChar *in;
8973
740k
    const xmlChar *ret;
8974
740k
    const xmlChar *prefix2;
8975
8976
740k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
740k
    GROW;
8979
740k
    in = ctxt->input->cur;
8980
8981
740k
    cmp = prefix;
8982
1.72M
    while (*in != 0 && *in == *cmp) {
8983
986k
  ++in;
8984
986k
  ++cmp;
8985
986k
    }
8986
740k
    if ((*cmp == 0) && (*in == ':')) {
8987
586k
        in++;
8988
586k
  cmp = name;
8989
3.00M
  while (*in != 0 && *in == *cmp) {
8990
2.41M
      ++in;
8991
2.41M
      ++cmp;
8992
2.41M
  }
8993
586k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
383k
            ctxt->input->col += in - ctxt->input->cur;
8996
383k
      ctxt->input->cur = in;
8997
383k
      return((const xmlChar*) 1);
8998
383k
  }
8999
586k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
357k
    ret = xmlParseQName (ctxt, &prefix2);
9004
357k
    if ((ret == name) && (prefix == prefix2))
9005
13.6k
  return((const xmlChar*) 1);
9006
343k
    return ret;
9007
357k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
5.40k
    const xmlChar *oldbase = ctxt->input->base;\
9045
5.40k
    GROW;\
9046
5.40k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
5.40k
        return(NULL);\
9048
5.40k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
5.40k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
10.8M
{
9059
10.8M
    xmlChar limit = 0;
9060
10.8M
    const xmlChar *in = NULL, *start, *end, *last;
9061
10.8M
    xmlChar *ret = NULL;
9062
10.8M
    int line, col;
9063
10.8M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
2.63M
                    XML_MAX_HUGE_LENGTH :
9065
10.8M
                    XML_MAX_TEXT_LENGTH;
9066
9067
10.8M
    GROW;
9068
10.8M
    in = (xmlChar *) CUR_PTR;
9069
10.8M
    line = ctxt->input->line;
9070
10.8M
    col = ctxt->input->col;
9071
10.8M
    if (*in != '"' && *in != '\'') {
9072
138k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
138k
        return (NULL);
9074
138k
    }
9075
10.7M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
10.7M
    limit = *in++;
9083
10.7M
    col++;
9084
10.7M
    end = ctxt->input->end;
9085
10.7M
    start = in;
9086
10.7M
    if (in >= end) {
9087
501
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
501
    }
9089
10.7M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
488k
  while ((in < end) && (*in != limit) &&
9094
488k
         ((*in == 0x20) || (*in == 0x9) ||
9095
485k
          (*in == 0xA) || (*in == 0xD))) {
9096
147k
      if (*in == 0xA) {
9097
23.0k
          line++; col = 1;
9098
124k
      } else {
9099
124k
          col++;
9100
124k
      }
9101
147k
      in++;
9102
147k
      start = in;
9103
147k
      if (in >= end) {
9104
106
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
106
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
106
      }
9111
147k
  }
9112
3.05M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
3.05M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
2.72M
      col++;
9115
2.72M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
2.71M
      if (in >= end) {
9117
485
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
485
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
485
      }
9124
2.71M
  }
9125
341k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
353k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
500k
  while ((in < end) && (*in != limit) &&
9131
500k
         ((*in == 0x20) || (*in == 0x9) ||
9132
234k
          (*in == 0xA) || (*in == 0xD))) {
9133
159k
      if (*in == 0xA) {
9134
30.4k
          line++, col = 1;
9135
128k
      } else {
9136
128k
          col++;
9137
128k
      }
9138
159k
      in++;
9139
159k
      if (in >= end) {
9140
171
    const xmlChar *oldbase = ctxt->input->base;
9141
171
    GROW;
9142
171
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
171
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
171
    end = ctxt->input->end;
9151
171
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
171
      }
9157
159k
  }
9158
341k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
341k
  if (*in != limit) goto need_complex;
9164
10.3M
    } else {
9165
144M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
144M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
133M
      in++;
9168
133M
      col++;
9169
133M
      if (in >= end) {
9170
4.31k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
4.31k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
4.31k
      }
9177
133M
  }
9178
10.3M
  last = in;
9179
10.3M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
10.3M
  if (*in != limit) goto need_complex;
9185
10.3M
    }
9186
8.93M
    in++;
9187
8.93M
    col++;
9188
8.93M
    if (len != NULL) {
9189
6.36M
        if (alloc) *alloc = 0;
9190
6.36M
        *len = last - start;
9191
6.36M
        ret = (xmlChar *) start;
9192
6.36M
    } else {
9193
2.57M
        if (alloc) *alloc = 1;
9194
2.57M
        ret = xmlStrndup(start, last - start);
9195
2.57M
    }
9196
8.93M
    CUR_PTR = in;
9197
8.93M
    ctxt->input->line = line;
9198
8.93M
    ctxt->input->col = col;
9199
8.93M
    return ret;
9200
1.80M
need_complex:
9201
1.80M
    if (alloc) *alloc = 1;
9202
1.80M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
10.7M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
9.29M
{
9226
9.29M
    const xmlChar *name;
9227
9.29M
    xmlChar *val, *internal_val = NULL;
9228
9.29M
    int normalize = 0;
9229
9230
9.29M
    *value = NULL;
9231
9.29M
    GROW;
9232
9.29M
    name = xmlParseQName(ctxt, prefix);
9233
9.29M
    if (name == NULL) {
9234
1.29M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
1.29M
                       "error parsing attribute name\n");
9236
1.29M
        return (NULL);
9237
1.29M
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
8.00M
    if (ctxt->attsSpecial != NULL) {
9243
737k
        int type;
9244
9245
737k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
737k
                                                 pref, elem, *prefix, name);
9247
737k
        if (type != 0)
9248
344k
            normalize = 1;
9249
737k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
8.00M
    SKIP_BLANKS;
9255
8.00M
    if (RAW == '=') {
9256
7.44M
        NEXT;
9257
7.44M
        SKIP_BLANKS;
9258
7.44M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
7.44M
        if (val == NULL)
9260
67.8k
            return (NULL);
9261
7.37M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
341k
      if (*alloc) {
9269
76.0k
          const xmlChar *val2;
9270
9271
76.0k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
76.0k
    if ((val2 != NULL) && (val2 != val)) {
9273
9.28k
        xmlFree(val);
9274
9.28k
        val = (xmlChar *) val2;
9275
9.28k
    }
9276
76.0k
      }
9277
341k
  }
9278
7.37M
        ctxt->instate = XML_PARSER_CONTENT;
9279
7.37M
    } else {
9280
562k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
562k
                          "Specification mandates value for attribute %s\n",
9282
562k
                          name);
9283
562k
        return (name);
9284
562k
    }
9285
9286
7.37M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
329k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
94.2k
            internal_val = xmlStrndup(val, *len);
9294
94.2k
            if (!xmlCheckLanguageID(internal_val)) {
9295
72.9k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
72.9k
                              "Malformed value for xml:lang : %s\n",
9297
72.9k
                              internal_val, NULL);
9298
72.9k
            }
9299
94.2k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
329k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
6.32k
            internal_val = xmlStrndup(val, *len);
9306
6.32k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
44
                *(ctxt->space) = 0;
9308
6.28k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
344
                *(ctxt->space) = 1;
9310
5.94k
            else {
9311
5.94k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
5.94k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
5.94k
                              internal_val, NULL);
9314
5.94k
            }
9315
6.32k
        }
9316
329k
        if (internal_val) {
9317
100k
            xmlFree(internal_val);
9318
100k
        }
9319
329k
    }
9320
9321
7.37M
    *value = val;
9322
7.37M
    return (name);
9323
8.00M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
11.0M
                  const xmlChar **URI, int *tlen) {
9356
11.0M
    const xmlChar *localname;
9357
11.0M
    const xmlChar *prefix;
9358
11.0M
    const xmlChar *attname;
9359
11.0M
    const xmlChar *aprefix;
9360
11.0M
    const xmlChar *nsname;
9361
11.0M
    xmlChar *attvalue;
9362
11.0M
    const xmlChar **atts = ctxt->atts;
9363
11.0M
    int maxatts = ctxt->maxatts;
9364
11.0M
    int nratts, nbatts, nbdef, inputid;
9365
11.0M
    int i, j, nbNs, attval;
9366
11.0M
    unsigned long cur;
9367
11.0M
    int nsNr = ctxt->nsNr;
9368
9369
11.0M
    if (RAW != '<') return(NULL);
9370
11.0M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
11.0M
    SHRINK;
9380
11.0M
    cur = ctxt->input->cur - ctxt->input->base;
9381
11.0M
    inputid = ctxt->input->id;
9382
11.0M
    nbatts = 0;
9383
11.0M
    nratts = 0;
9384
11.0M
    nbdef = 0;
9385
11.0M
    nbNs = 0;
9386
11.0M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
11.0M
    ctxt->nsNr = nsNr;
9389
9390
11.0M
    localname = xmlParseQName(ctxt, &prefix);
9391
11.0M
    if (localname == NULL) {
9392
841k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
841k
           "StartTag: invalid element name\n");
9394
841k
        return(NULL);
9395
841k
    }
9396
10.2M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
10.2M
    SKIP_BLANKS;
9404
10.2M
    GROW;
9405
9406
13.2M
    while (((RAW != '>') &&
9407
13.2M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
13.2M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
9.29M
  int len = -1, alloc = 0;
9410
9411
9.29M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
9.29M
                               &aprefix, &attvalue, &len, &alloc);
9413
9.29M
        if (attname == NULL) {
9414
1.36M
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
1.36M
           "xmlParseStartTag: problem parsing attributes\n");
9416
1.36M
      break;
9417
1.36M
  }
9418
7.93M
        if (attvalue == NULL)
9419
562k
            goto next_attr;
9420
7.37M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
7.37M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
228k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
228k
            xmlURIPtr uri;
9425
9426
228k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
228k
            if (*URL != 0) {
9434
220k
                uri = xmlParseURI((const char *) URL);
9435
220k
                if (uri == NULL) {
9436
89.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
89.2k
                             "xmlns: '%s' is not a valid URI\n",
9438
89.2k
                                       URL, NULL, NULL);
9439
130k
                } else {
9440
130k
                    if (uri->scheme == NULL) {
9441
39.9k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
39.9k
                                  "xmlns: URI %s is not absolute\n",
9443
39.9k
                                  URL, NULL, NULL);
9444
39.9k
                    }
9445
130k
                    xmlFreeURI(uri);
9446
130k
                }
9447
220k
                if (URL == ctxt->str_xml_ns) {
9448
6
                    if (attname != ctxt->str_xml) {
9449
6
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
6
                     "xml namespace URI cannot be the default namespace\n",
9451
6
                                 NULL, NULL, NULL);
9452
6
                    }
9453
6
                    goto next_attr;
9454
6
                }
9455
220k
                if ((len == 29) &&
9456
220k
                    (xmlStrEqual(URL,
9457
9.49k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
533
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
533
                         "reuse of the xmlns namespace name is forbidden\n",
9460
533
                             NULL, NULL, NULL);
9461
533
                    goto next_attr;
9462
533
                }
9463
220k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
275k
            for (j = 1;j <= nbNs;j++)
9468
63.0k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
15.8k
                    break;
9470
227k
            if (j <= nbNs)
9471
15.8k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
212k
            else
9473
212k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
7.14M
        } else if (aprefix == ctxt->str_xmlns) {
9476
616k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
616k
            xmlURIPtr uri;
9478
9479
616k
            if (attname == ctxt->str_xml) {
9480
7.52k
                if (URL != ctxt->str_xml_ns) {
9481
7.41k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
7.41k
                             "xml namespace prefix mapped to wrong URI\n",
9483
7.41k
                             NULL, NULL, NULL);
9484
7.41k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
7.52k
                goto next_attr;
9489
7.52k
            }
9490
609k
            if (URL == ctxt->str_xml_ns) {
9491
510
                if (attname != ctxt->str_xml) {
9492
510
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
510
                             "xml namespace URI mapped to wrong prefix\n",
9494
510
                             NULL, NULL, NULL);
9495
510
                }
9496
510
                goto next_attr;
9497
510
            }
9498
608k
            if (attname == ctxt->str_xmlns) {
9499
3.70k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
3.70k
                         "redefinition of the xmlns prefix is forbidden\n",
9501
3.70k
                         NULL, NULL, NULL);
9502
3.70k
                goto next_attr;
9503
3.70k
            }
9504
605k
            if ((len == 29) &&
9505
605k
                (xmlStrEqual(URL,
9506
19.6k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
2.86k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
2.86k
                         "reuse of the xmlns namespace name is forbidden\n",
9509
2.86k
                         NULL, NULL, NULL);
9510
2.86k
                goto next_attr;
9511
2.86k
            }
9512
602k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
15.3k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
15.3k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
15.3k
                              attname, NULL, NULL);
9516
15.3k
                goto next_attr;
9517
586k
            } else {
9518
586k
                uri = xmlParseURI((const char *) URL);
9519
586k
                if (uri == NULL) {
9520
179k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
179k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
179k
                                       attname, URL, NULL);
9523
407k
                } else {
9524
407k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
46.6k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
46.6k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
46.6k
                                  attname, URL, NULL);
9528
46.6k
                    }
9529
407k
                    xmlFreeURI(uri);
9530
407k
                }
9531
586k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
721k
            for (j = 1;j <= nbNs;j++)
9537
151k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
16.2k
                    break;
9539
586k
            if (j <= nbNs)
9540
16.2k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
570k
            else
9542
570k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
6.53M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
6.53M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
174k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
174k
                maxatts = ctxt->maxatts;
9553
174k
                atts = ctxt->atts;
9554
174k
            }
9555
6.53M
            ctxt->attallocs[nratts++] = alloc;
9556
6.53M
            atts[nbatts++] = attname;
9557
6.53M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
6.53M
            if (alloc)
9565
751k
                atts[nbatts++] = NULL;
9566
5.77M
            else
9567
5.77M
                atts[nbatts++] = ctxt->input->base;
9568
6.53M
            atts[nbatts++] = attvalue;
9569
6.53M
            attvalue += len;
9570
6.53M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
6.53M
            if (alloc != 0) attval = 1;
9575
6.53M
            attvalue = NULL; /* moved into atts */
9576
6.53M
        }
9577
9578
7.93M
next_attr:
9579
7.93M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
263k
            xmlFree(attvalue);
9581
263k
            attvalue = NULL;
9582
263k
        }
9583
9584
7.93M
  GROW
9585
7.93M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
7.93M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
3.65M
      break;
9589
4.28M
  if (SKIP_BLANKS == 0) {
9590
1.31M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
1.31M
         "attributes construct error\n");
9592
1.31M
      break;
9593
1.31M
  }
9594
2.96M
        GROW;
9595
2.96M
    }
9596
9597
10.2M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
16.7M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
6.53M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
5.77M
            const xmlChar *old = atts[i+2];
9612
5.77M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
5.77M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
5.77M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
5.77M
        }
9616
6.53M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
10.2M
    if (ctxt->attsDefault != NULL) {
9622
1.02M
        xmlDefAttrsPtr defaults;
9623
9624
1.02M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
1.02M
  if (defaults != NULL) {
9626
328k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
210k
          attname = defaults->values[5 * i];
9628
210k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
210k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
7.22k
        for (j = 1;j <= nbNs;j++)
9638
3.05k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
1.66k
          break;
9640
5.82k
              if (j <= nbNs) continue;
9641
9642
4.16k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
4.16k
        if (nsname != defaults->values[5 * i + 2]) {
9644
1.35k
      if (nsPush(ctxt, NULL,
9645
1.35k
                 defaults->values[5 * i + 2]) > 0)
9646
1.32k
          nbNs++;
9647
1.35k
        }
9648
204k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
16.3k
        for (j = 1;j <= nbNs;j++)
9653
4.76k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
2.29k
          break;
9655
13.8k
              if (j <= nbNs) continue;
9656
9657
11.5k
        nsname = xmlGetNamespace(ctxt, attname);
9658
11.5k
        if (nsname != defaults->values[5 * i + 2]) {
9659
4.38k
      if (nsPush(ctxt, attname,
9660
4.38k
                 defaults->values[5 * i + 2]) > 0)
9661
4.31k
          nbNs++;
9662
4.38k
        }
9663
190k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
451k
        for (j = 0;j < nbatts;j+=5) {
9668
268k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
8.60k
          break;
9670
268k
        }
9671
190k
        if (j < nbatts) continue;
9672
9673
182k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
5.85k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
5.85k
      maxatts = ctxt->maxatts;
9679
5.85k
      atts = ctxt->atts;
9680
5.85k
        }
9681
182k
        atts[nbatts++] = attname;
9682
182k
        atts[nbatts++] = aprefix;
9683
182k
        if (aprefix == NULL)
9684
140k
      atts[nbatts++] = NULL;
9685
41.2k
        else
9686
41.2k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
182k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
182k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
182k
        if ((ctxt->standalone == 1) &&
9690
182k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
182k
        nbdef++;
9696
182k
    }
9697
210k
      }
9698
118k
  }
9699
1.02M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
16.9M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
6.71M
  if (atts[i + 1] != NULL) {
9709
630k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
630k
      if (nsname == NULL) {
9711
207k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
207k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
207k
        atts[i + 1], atts[i], localname);
9714
207k
      }
9715
630k
      atts[i + 2] = nsname;
9716
630k
  } else
9717
6.08M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
9.72M
        for (j = 0; j < i;j += 5) {
9725
3.05M
      if (atts[i] == atts[j]) {
9726
71.7k
          if (atts[i+1] == atts[j+1]) {
9727
45.2k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
45.2k
        break;
9729
45.2k
    }
9730
26.5k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
74
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
74
           "Namespaced Attribute %s in '%s' redefined\n",
9733
74
           atts[i], nsname, NULL);
9734
74
        break;
9735
74
    }
9736
26.5k
      }
9737
3.05M
  }
9738
6.71M
    }
9739
9740
10.2M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
10.2M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
1.44M
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
1.44M
           "Namespace prefix %s on %s is not defined\n",
9744
1.44M
     prefix, localname, NULL);
9745
1.44M
    }
9746
10.2M
    *pref = prefix;
9747
10.2M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
10.2M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
10.2M
  (!ctxt->disableSAX)) {
9754
9.14M
  if (nbNs > 0)
9755
503k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
503k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
503k
        nbatts / 5, nbdef, atts);
9758
8.64M
  else
9759
8.64M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
8.64M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
9.14M
    }
9762
9763
10.2M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
10.2M
    if (attval != 0) {
9768
1.63M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
958k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
751k
          xmlFree((xmlChar *) atts[i]);
9771
676k
    }
9772
9773
10.2M
    return(localname);
9774
10.2M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
3.15M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
3.15M
    const xmlChar *name;
9794
9795
3.15M
    GROW;
9796
3.15M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
3.15M
    SKIP(2);
9801
9802
3.15M
    if (tag->prefix == NULL)
9803
2.41M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
740k
    else
9805
740k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
3.15M
    GROW;
9811
3.15M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
3.15M
    SKIP_BLANKS;
9814
3.15M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
337k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
337k
    } else
9817
2.82M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
3.15M
    if (name != (xmlChar*)1) {
9826
661k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
661k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
661k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
661k
                    ctxt->name, tag->line, name);
9830
661k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
3.15M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
3.15M
  (!ctxt->disableSAX))
9837
2.70M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
2.70M
                                tag->URI);
9839
9840
3.15M
    spacePop(ctxt);
9841
3.15M
    if (tag->nsNr != 0)
9842
88.8k
  nsPop(ctxt, tag->nsNr);
9843
3.15M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
168k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
168k
    xmlChar *buf = NULL;
9864
168k
    int len = 0;
9865
168k
    int size = XML_PARSER_BUFFER_SIZE;
9866
168k
    int r, rl;
9867
168k
    int s, sl;
9868
168k
    int cur, l;
9869
168k
    int count = 0;
9870
168k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
55.8k
                    XML_MAX_HUGE_LENGTH :
9872
168k
                    XML_MAX_TEXT_LENGTH;
9873
9874
168k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
168k
    SKIP(3);
9877
9878
168k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
168k
    SKIP(6);
9881
9882
168k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
168k
    r = CUR_CHAR(rl);
9884
168k
    if (!IS_CHAR(r)) {
9885
11.7k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
11.7k
        goto out;
9887
11.7k
    }
9888
156k
    NEXTL(rl);
9889
156k
    s = CUR_CHAR(sl);
9890
156k
    if (!IS_CHAR(s)) {
9891
12.5k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
12.5k
        goto out;
9893
12.5k
    }
9894
143k
    NEXTL(sl);
9895
143k
    cur = CUR_CHAR(l);
9896
143k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
143k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
19.1M
    while (IS_CHAR(cur) &&
9902
19.1M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
19.0M
  if (len + 5 >= size) {
9904
53.3k
      xmlChar *tmp;
9905
9906
53.3k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
53.3k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
53.3k
      buf = tmp;
9912
53.3k
      size *= 2;
9913
53.3k
  }
9914
19.0M
  COPY_BUF(rl,buf,len,r);
9915
19.0M
  r = s;
9916
19.0M
  rl = sl;
9917
19.0M
  s = cur;
9918
19.0M
  sl = l;
9919
19.0M
  count++;
9920
19.0M
  if (count > 50) {
9921
337k
      SHRINK;
9922
337k
      GROW;
9923
337k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
337k
      count = 0;
9927
337k
  }
9928
19.0M
  NEXTL(l);
9929
19.0M
  cur = CUR_CHAR(l);
9930
19.0M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
19.0M
    }
9936
143k
    buf[len] = 0;
9937
143k
    if (cur != '>') {
9938
30.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
30.7k
                       "CData section not finished\n%.50s\n", buf);
9940
30.7k
        goto out;
9941
30.7k
    }
9942
113k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
113k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
83.6k
  if (ctxt->sax->cdataBlock != NULL)
9949
55.5k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
28.0k
  else if (ctxt->sax->characters != NULL)
9951
28.0k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
83.6k
    }
9953
9954
168k
out:
9955
168k
    if (ctxt->instate != XML_PARSER_EOF)
9956
168k
        ctxt->instate = XML_PARSER_CONTENT;
9957
168k
    xmlFree(buf);
9958
168k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
235k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
235k
    int nameNr = ctxt->nameNr;
9971
9972
235k
    GROW;
9973
23.1M
    while ((RAW != 0) &&
9974
23.1M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
22.9M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
22.9M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
202k
      xmlParsePI(ctxt);
9982
202k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
22.7M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
168k
      xmlParseCDSect(ctxt);
9990
168k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
22.5M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
22.5M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
230k
      xmlParseComment(ctxt);
9998
230k
      ctxt->instate = XML_PARSER_CONTENT;
9999
230k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
22.3M
  else if (*cur == '<') {
10005
8.89M
            if (NXT(1) == '/') {
10006
1.91M
                if (ctxt->nameNr <= nameNr)
10007
35.3k
                    break;
10008
1.88M
          xmlParseElementEnd(ctxt);
10009
6.97M
            } else {
10010
6.97M
          xmlParseElementStart(ctxt);
10011
6.97M
            }
10012
8.89M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
13.4M
  else if (*cur == '&') {
10020
2.40M
      xmlParseReference(ctxt);
10021
2.40M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
11.0M
  else {
10027
11.0M
      xmlParseCharData(ctxt, 0);
10028
11.0M
  }
10029
10030
22.8M
  GROW;
10031
22.8M
  SHRINK;
10032
22.8M
    }
10033
235k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
87.5k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
87.5k
    int nameNr = ctxt->nameNr;
10047
10048
87.5k
    xmlParseContentInternal(ctxt);
10049
10050
87.5k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
3.16k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
3.16k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
3.16k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
3.16k
                "Premature end of data in tag %s line %d\n",
10055
3.16k
    name, line, NULL);
10056
3.16k
    }
10057
87.5k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
247k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
247k
    if (xmlParseElementStart(ctxt) != 0)
10078
99.9k
        return;
10079
10080
147k
    xmlParseContentInternal(ctxt);
10081
147k
    if (ctxt->instate == XML_PARSER_EOF)
10082
393
  return;
10083
10084
147k
    if (CUR == 0) {
10085
112k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
112k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
112k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
112k
                "Premature end of data in tag %s line %d\n",
10089
112k
    name, line, NULL);
10090
112k
        return;
10091
112k
    }
10092
10093
34.4k
    xmlParseElementEnd(ctxt);
10094
34.4k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
7.22M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
7.22M
    const xmlChar *name;
10108
7.22M
    const xmlChar *prefix = NULL;
10109
7.22M
    const xmlChar *URI = NULL;
10110
7.22M
    xmlParserNodeInfo node_info;
10111
7.22M
    int line, tlen = 0;
10112
7.22M
    xmlNodePtr ret;
10113
7.22M
    int nsNr = ctxt->nsNr;
10114
10115
7.22M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
7.22M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
159
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
159
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
159
        xmlParserMaxDepth);
10120
159
  xmlHaltParser(ctxt);
10121
159
  return(-1);
10122
159
    }
10123
10124
    /* Capture start position */
10125
7.22M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
7.22M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
7.22M
    else if (*ctxt->space == -2)
10134
1.66M
  spacePush(ctxt, -1);
10135
5.55M
    else
10136
5.55M
  spacePush(ctxt, *ctxt->space);
10137
10138
7.22M
    line = ctxt->input->line;
10139
7.22M
#ifdef LIBXML_SAX1_ENABLED
10140
7.22M
    if (ctxt->sax2)
10141
4.80M
#endif /* LIBXML_SAX1_ENABLED */
10142
4.80M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
2.41M
#ifdef LIBXML_SAX1_ENABLED
10144
2.41M
    else
10145
2.41M
  name = xmlParseStartTag(ctxt);
10146
7.22M
#endif /* LIBXML_SAX1_ENABLED */
10147
7.22M
    if (ctxt->instate == XML_PARSER_EOF)
10148
291
  return(-1);
10149
7.22M
    if (name == NULL) {
10150
1.32M
  spacePop(ctxt);
10151
1.32M
        return(-1);
10152
1.32M
    }
10153
5.89M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
5.89M
    ret = ctxt->node;
10155
10156
5.89M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
5.89M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
5.89M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
5.89M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
5.89M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
1.28M
        SKIP(2);
10172
1.28M
  if (ctxt->sax2) {
10173
987k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
987k
    (!ctxt->disableSAX))
10175
777k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
987k
#ifdef LIBXML_SAX1_ENABLED
10177
987k
  } else {
10178
299k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
299k
    (!ctxt->disableSAX))
10180
259k
    ctxt->sax->endElement(ctxt->userData, name);
10181
299k
#endif /* LIBXML_SAX1_ENABLED */
10182
299k
  }
10183
1.28M
  namePop(ctxt);
10184
1.28M
  spacePop(ctxt);
10185
1.28M
  if (nsNr != ctxt->nsNr)
10186
22.6k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
1.28M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
1.28M
  return(1);
10195
1.28M
    }
10196
4.61M
    if (RAW == '>') {
10197
3.13M
        NEXT1;
10198
3.13M
    } else {
10199
1.47M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
1.47M
         "Couldn't find end of Start Tag %s line %d\n",
10201
1.47M
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
1.47M
  nodePop(ctxt);
10207
1.47M
  namePop(ctxt);
10208
1.47M
  spacePop(ctxt);
10209
1.47M
  if (nsNr != ctxt->nsNr)
10210
104k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
1.47M
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
1.47M
  return(-1);
10223
1.47M
    }
10224
10225
3.13M
    return(0);
10226
4.61M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.91M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.91M
    xmlParserNodeInfo node_info;
10237
1.91M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.91M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.91M
    if (ctxt->sax2) {
10249
1.32M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
1.32M
  namePop(ctxt);
10251
1.32M
    }
10252
587k
#ifdef LIBXML_SAX1_ENABLED
10253
587k
    else
10254
587k
  xmlParseEndTag1(ctxt, 0);
10255
1.91M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.91M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.91M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
405k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
405k
    xmlChar *buf = NULL;
10286
405k
    int len = 0;
10287
405k
    int size = 10;
10288
405k
    xmlChar cur;
10289
10290
405k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
405k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
405k
    cur = CUR;
10296
405k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
8.63k
  xmlFree(buf);
10298
8.63k
  return(NULL);
10299
8.63k
    }
10300
396k
    buf[len++] = cur;
10301
396k
    NEXT;
10302
396k
    cur=CUR;
10303
396k
    if (cur != '.') {
10304
7.49k
  xmlFree(buf);
10305
7.49k
  return(NULL);
10306
7.49k
    }
10307
388k
    buf[len++] = cur;
10308
388k
    NEXT;
10309
388k
    cur=CUR;
10310
922k
    while ((cur >= '0') && (cur <= '9')) {
10311
533k
  if (len + 1 >= size) {
10312
1.61k
      xmlChar *tmp;
10313
10314
1.61k
      size *= 2;
10315
1.61k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.61k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.61k
      buf = tmp;
10322
1.61k
  }
10323
533k
  buf[len++] = cur;
10324
533k
  NEXT;
10325
533k
  cur=CUR;
10326
533k
    }
10327
388k
    buf[len] = 0;
10328
388k
    return(buf);
10329
388k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
515k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
515k
    xmlChar *version = NULL;
10349
10350
515k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
420k
  SKIP(7);
10352
420k
  SKIP_BLANKS;
10353
420k
  if (RAW != '=') {
10354
7.93k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
7.93k
      return(NULL);
10356
7.93k
        }
10357
412k
  NEXT;
10358
412k
  SKIP_BLANKS;
10359
412k
  if (RAW == '"') {
10360
359k
      NEXT;
10361
359k
      version = xmlParseVersionNum(ctxt);
10362
359k
      if (RAW != '"') {
10363
31.5k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
31.5k
      } else
10365
328k
          NEXT;
10366
359k
  } else if (RAW == '\''){
10367
45.0k
      NEXT;
10368
45.0k
      version = xmlParseVersionNum(ctxt);
10369
45.0k
      if (RAW != '\'') {
10370
2.46k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
2.46k
      } else
10372
42.5k
          NEXT;
10373
45.0k
  } else {
10374
7.28k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
7.28k
  }
10376
412k
    }
10377
507k
    return(version);
10378
515k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
146k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
146k
    xmlChar *buf = NULL;
10395
146k
    int len = 0;
10396
146k
    int size = 10;
10397
146k
    xmlChar cur;
10398
10399
146k
    cur = CUR;
10400
146k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
146k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
145k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
145k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
145k
  buf[len++] = cur;
10409
145k
  NEXT;
10410
145k
  cur = CUR;
10411
1.54M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
1.54M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
1.54M
         ((cur >= '0') && (cur <= '9')) ||
10414
1.54M
         (cur == '.') || (cur == '_') ||
10415
1.54M
         (cur == '-')) {
10416
1.39M
      if (len + 1 >= size) {
10417
64.1k
          xmlChar *tmp;
10418
10419
64.1k
    size *= 2;
10420
64.1k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
64.1k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
64.1k
    buf = tmp;
10427
64.1k
      }
10428
1.39M
      buf[len++] = cur;
10429
1.39M
      NEXT;
10430
1.39M
      cur = CUR;
10431
1.39M
      if (cur == 0) {
10432
708
          SHRINK;
10433
708
    GROW;
10434
708
    cur = CUR;
10435
708
      }
10436
1.39M
        }
10437
145k
  buf[len] = 0;
10438
145k
    } else {
10439
1.30k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.30k
    }
10441
146k
    return(buf);
10442
146k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
391k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
391k
    xmlChar *encoding = NULL;
10462
10463
391k
    SKIP_BLANKS;
10464
391k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
149k
  SKIP(8);
10466
149k
  SKIP_BLANKS;
10467
149k
  if (RAW != '=') {
10468
1.85k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.85k
      return(NULL);
10470
1.85k
        }
10471
147k
  NEXT;
10472
147k
  SKIP_BLANKS;
10473
147k
  if (RAW == '"') {
10474
120k
      NEXT;
10475
120k
      encoding = xmlParseEncName(ctxt);
10476
120k
      if (RAW != '"') {
10477
6.39k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
6.39k
    xmlFree((xmlChar *) encoding);
10479
6.39k
    return(NULL);
10480
6.39k
      } else
10481
114k
          NEXT;
10482
120k
  } else if (RAW == '\''){
10483
25.7k
      NEXT;
10484
25.7k
      encoding = xmlParseEncName(ctxt);
10485
25.7k
      if (RAW != '\'') {
10486
1.82k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
1.82k
    xmlFree((xmlChar *) encoding);
10488
1.82k
    return(NULL);
10489
1.82k
      } else
10490
23.9k
          NEXT;
10491
25.7k
  } else {
10492
1.11k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
1.11k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
139k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
46.1k
      xmlFree((xmlChar *) encoding);
10500
46.1k
            return(NULL);
10501
46.1k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
93.3k
        if ((encoding != NULL) &&
10508
93.3k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
92.7k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
3.64k
      if ((ctxt->encoding == NULL) &&
10517
3.64k
          (ctxt->input->buf != NULL) &&
10518
3.64k
          (ctxt->input->buf->encoder == NULL)) {
10519
3.64k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
3.64k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
3.64k
      }
10522
3.64k
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
3.64k
      ctxt->encoding = encoding;
10525
3.64k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
89.7k
        else if ((encoding != NULL) &&
10530
89.7k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
89.0k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
35.2k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
35.2k
      ctxt->encoding = encoding;
10535
35.2k
  }
10536
54.5k
  else if (encoding != NULL) {
10537
53.8k
      xmlCharEncodingHandlerPtr handler;
10538
10539
53.8k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
53.8k
      ctxt->input->encoding = encoding;
10542
10543
53.8k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
53.8k
      if (handler != NULL) {
10545
52.6k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
253
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
253
        return(NULL);
10549
253
    }
10550
52.6k
      } else {
10551
1.15k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
1.15k
      "Unsupported encoding %s\n", encoding);
10553
1.15k
    return(NULL);
10554
1.15k
      }
10555
53.8k
  }
10556
93.3k
    }
10557
333k
    return(encoding);
10558
391k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
342k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
342k
    int standalone = -2;
10596
10597
342k
    SKIP_BLANKS;
10598
342k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
70.2k
  SKIP(10);
10600
70.2k
        SKIP_BLANKS;
10601
70.2k
  if (RAW != '=') {
10602
747
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
747
      return(standalone);
10604
747
        }
10605
69.4k
  NEXT;
10606
69.4k
  SKIP_BLANKS;
10607
69.4k
        if (RAW == '\''){
10608
13.1k
      NEXT;
10609
13.1k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
10.4k
          standalone = 0;
10611
10.4k
                SKIP(2);
10612
10.4k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
2.66k
                 (NXT(2) == 's')) {
10614
2.03k
          standalone = 1;
10615
2.03k
    SKIP(3);
10616
2.03k
            } else {
10617
630
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
630
      }
10619
13.1k
      if (RAW != '\'') {
10620
984
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
984
      } else
10622
12.1k
          NEXT;
10623
56.3k
  } else if (RAW == '"'){
10624
55.4k
      NEXT;
10625
55.4k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
33.3k
          standalone = 0;
10627
33.3k
    SKIP(2);
10628
33.3k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
22.0k
                 (NXT(2) == 's')) {
10630
19.5k
          standalone = 1;
10631
19.5k
                SKIP(3);
10632
19.5k
            } else {
10633
2.50k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
2.50k
      }
10635
55.4k
      if (RAW != '"') {
10636
3.18k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
3.18k
      } else
10638
52.2k
          NEXT;
10639
55.4k
  } else {
10640
891
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
891
        }
10642
69.4k
    }
10643
341k
    return(standalone);
10644
342k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
497k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
497k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
497k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
497k
    SKIP(5);
10672
10673
497k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
497k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
497k
    version = xmlParseVersionInfo(ctxt);
10683
497k
    if (version == NULL) {
10684
121k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
376k
    } else {
10686
376k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
43.8k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
15.8k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
15.8k
                "Unsupported version '%s'\n",
10693
15.8k
                version);
10694
28.0k
      } else {
10695
28.0k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
6.34k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
6.34k
                      "Unsupported version '%s'\n",
10698
6.34k
          version, NULL);
10699
21.7k
    } else {
10700
21.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
21.7k
              "Unsupported version '%s'\n",
10702
21.7k
              version);
10703
21.7k
    }
10704
28.0k
      }
10705
43.8k
  }
10706
376k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
376k
  ctxt->version = version;
10709
376k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
497k
    if (!IS_BLANK_CH(RAW)) {
10715
265k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
123k
      SKIP(2);
10717
123k
      return;
10718
123k
  }
10719
142k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
142k
    }
10721
373k
    xmlParseEncodingDecl(ctxt);
10722
373k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
373k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
1.11k
        return;
10728
1.11k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
372k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
36.4k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
30.1k
      SKIP(2);
10736
30.1k
      return;
10737
30.1k
  }
10738
6.31k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
6.31k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
342k
    GROW;
10745
10746
342k
    SKIP_BLANKS;
10747
342k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
342k
    SKIP_BLANKS;
10750
342k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
144k
        SKIP(2);
10752
198k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
906
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
906
  NEXT;
10756
197k
    } else {
10757
197k
        int c;
10758
10759
197k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
6.90M
        while ((c = CUR) != 0) {
10761
6.89M
            NEXT;
10762
6.89M
            if (c == '>')
10763
187k
                break;
10764
6.89M
        }
10765
197k
    }
10766
342k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
714k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
839k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
839k
        SKIP_BLANKS;
10783
839k
        GROW;
10784
839k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
96.6k
      xmlParsePI(ctxt);
10786
743k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
29.0k
      xmlParseComment(ctxt);
10788
714k
        } else {
10789
714k
            break;
10790
714k
        }
10791
839k
    }
10792
714k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
363k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
363k
    xmlChar start[4];
10812
363k
    xmlCharEncoding enc;
10813
10814
363k
    xmlInitParser();
10815
10816
363k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
363k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
363k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
363k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
363k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
363k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
363k
    if ((ctxt->encoding == NULL) &&
10835
363k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
356k
  start[0] = RAW;
10842
356k
  start[1] = NXT(1);
10843
356k
  start[2] = NXT(2);
10844
356k
  start[3] = NXT(3);
10845
356k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
356k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
200k
      xmlSwitchEncoding(ctxt, enc);
10848
200k
  }
10849
356k
    }
10850
10851
10852
363k
    if (CUR == 0) {
10853
1.44k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
1.44k
  return(-1);
10855
1.44k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
361k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
22.4k
       GROW;
10865
22.4k
    }
10866
361k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
165k
  xmlParseXMLDecl(ctxt);
10872
165k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
165k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
371
      return(-1);
10878
371
  }
10879
165k
  ctxt->standalone = ctxt->input->standalone;
10880
165k
  SKIP_BLANKS;
10881
195k
    } else {
10882
195k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
195k
    }
10884
361k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
337k
        ctxt->sax->startDocument(ctxt->userData);
10886
361k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
361k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
361k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
361k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
361k
    GROW;
10903
361k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
177k
  ctxt->inSubset = 1;
10906
177k
  xmlParseDocTypeDecl(ctxt);
10907
177k
  if (RAW == '[') {
10908
121k
      ctxt->instate = XML_PARSER_DTD;
10909
121k
      xmlParseInternalSubset(ctxt);
10910
121k
      if (ctxt->instate == XML_PARSER_EOF)
10911
62.1k
    return(-1);
10912
121k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
115k
  ctxt->inSubset = 2;
10918
115k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
115k
      (!ctxt->disableSAX))
10920
104k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
104k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
115k
  if (ctxt->instate == XML_PARSER_EOF)
10923
10.5k
      return(-1);
10924
105k
  ctxt->inSubset = 0;
10925
10926
105k
        xmlCleanSpecialAttr(ctxt);
10927
10928
105k
  ctxt->instate = XML_PARSER_PROLOG;
10929
105k
  xmlParseMisc(ctxt);
10930
105k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
288k
    GROW;
10936
288k
    if (RAW != '<') {
10937
40.7k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
40.7k
           "Start tag expected, '<' not found\n");
10939
247k
    } else {
10940
247k
  ctxt->instate = XML_PARSER_CONTENT;
10941
247k
  xmlParseElement(ctxt);
10942
247k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
247k
  xmlParseMisc(ctxt);
10949
10950
247k
  if (RAW != 0) {
10951
96.0k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
96.0k
  }
10953
247k
  ctxt->instate = XML_PARSER_EOF;
10954
247k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
288k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
288k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
288k
    if ((ctxt->myDoc != NULL) &&
10966
288k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
1.18k
  xmlFreeDoc(ctxt->myDoc);
10968
1.18k
  ctxt->myDoc = NULL;
10969
1.18k
    }
10970
10971
288k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
10.6k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
10.6k
  if (ctxt->valid)
10974
7.74k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
10.6k
  if (ctxt->nsWellFormed)
10976
9.92k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
10.6k
  if (ctxt->options & XML_PARSE_OLD10)
10978
1.45k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
10.6k
    }
10980
288k
    if (! ctxt->wellFormed) {
10981
277k
  ctxt->valid = 0;
10982
277k
  return(-1);
10983
277k
    }
10984
10.6k
    return(0);
10985
288k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
5.25M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
5.25M
    const xmlChar *cur;
11110
11111
5.25M
    if (ctxt->checkIndex == 0) {
11112
4.86M
        cur = ctxt->input->cur + 1;
11113
4.86M
    } else {
11114
395k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
395k
    }
11116
11117
5.25M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
425k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
425k
        return(0);
11120
4.83M
    } else {
11121
4.83M
        ctxt->checkIndex = 0;
11122
4.83M
        return(1);
11123
4.83M
    }
11124
5.25M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
2.82M
                     const char *str, size_t strLen) {
11138
2.82M
    const xmlChar *cur, *term;
11139
11140
2.82M
    if (ctxt->checkIndex == 0) {
11141
1.34M
        cur = ctxt->input->cur + startDelta;
11142
1.48M
    } else {
11143
1.48M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.48M
    }
11145
11146
2.82M
    term = BAD_CAST strstr((const char *) cur, str);
11147
2.82M
    if (term == NULL) {
11148
1.71M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
1.71M
        if ((size_t) (end - cur) < strLen)
11152
67.4k
            end = cur;
11153
1.64M
        else
11154
1.64M
            end -= strLen - 1;
11155
1.71M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
1.71M
    } else {
11157
1.11M
        ctxt->checkIndex = 0;
11158
1.11M
    }
11159
11160
2.82M
    return(term);
11161
2.82M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
11.2M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
11.2M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
11.2M
    const xmlChar *end = ctxt->input->end;
11173
11174
299M
    while (cur < end) {
11175
298M
        if ((*cur == '<') || (*cur == '&')) {
11176
10.4M
            ctxt->checkIndex = 0;
11177
10.4M
            return(1);
11178
10.4M
        }
11179
287M
        cur++;
11180
287M
    }
11181
11182
826k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
826k
    return(0);
11184
11.2M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
9.67M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
9.67M
    const xmlChar *cur;
11196
9.67M
    const xmlChar *end = ctxt->input->end;
11197
9.67M
    int state = ctxt->endCheckState;
11198
11199
9.67M
    if (ctxt->checkIndex == 0)
11200
7.34M
        cur = ctxt->input->cur + 1;
11201
2.33M
    else
11202
2.33M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
659M
    while (cur < end) {
11205
656M
        if (state) {
11206
349M
            if (*cur == state)
11207
9.57M
                state = 0;
11208
349M
        } else if (*cur == '\'' || *cur == '"') {
11209
9.69M
            state = *cur;
11210
297M
        } else if (*cur == '>') {
11211
7.15M
            ctxt->checkIndex = 0;
11212
7.15M
            ctxt->endCheckState = 0;
11213
7.15M
            return(1);
11214
7.15M
        }
11215
649M
        cur++;
11216
649M
    }
11217
11218
2.52M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
2.52M
    ctxt->endCheckState = state;
11220
2.52M
    return(0);
11221
9.67M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
660k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
660k
    const xmlChar *cur, *start;
11240
660k
    const xmlChar *end = ctxt->input->end;
11241
660k
    int state = ctxt->endCheckState;
11242
11243
660k
    if (ctxt->checkIndex == 0) {
11244
197k
        cur = ctxt->input->cur + 1;
11245
463k
    } else {
11246
463k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
463k
    }
11248
660k
    start = cur;
11249
11250
111M
    while (cur < end) {
11251
110M
        if (state == '-') {
11252
20.3M
            if ((*cur == '-') &&
11253
20.3M
                (cur[1] == '-') &&
11254
20.3M
                (cur[2] == '>')) {
11255
155k
                state = 0;
11256
155k
                cur += 3;
11257
155k
                start = cur;
11258
155k
                continue;
11259
155k
            }
11260
20.3M
        }
11261
90.3M
        else if (state == ']') {
11262
480k
            if (*cur == '>') {
11263
125k
                ctxt->checkIndex = 0;
11264
125k
                ctxt->endCheckState = 0;
11265
125k
                return(1);
11266
125k
            }
11267
355k
            if (IS_BLANK_CH(*cur)) {
11268
27.0k
                state = ' ';
11269
328k
            } else if (*cur != ']') {
11270
32.7k
                state = 0;
11271
32.7k
                start = cur;
11272
32.7k
                continue;
11273
32.7k
            }
11274
355k
        }
11275
89.8M
        else if (state == ' ') {
11276
212k
            if (*cur == '>') {
11277
4.00k
                ctxt->checkIndex = 0;
11278
4.00k
                ctxt->endCheckState = 0;
11279
4.00k
                return(1);
11280
4.00k
            }
11281
208k
            if (!IS_BLANK_CH(*cur)) {
11282
22.9k
                state = 0;
11283
22.9k
                start = cur;
11284
22.9k
                continue;
11285
22.9k
            }
11286
208k
        }
11287
89.6M
        else if (state != 0) {
11288
41.5M
            if (*cur == state) {
11289
921k
                state = 0;
11290
921k
                start = cur + 1;
11291
921k
            }
11292
41.5M
        }
11293
48.1M
        else if (*cur == '<') {
11294
1.47M
            if ((cur[1] == '!') &&
11295
1.47M
                (cur[2] == '-') &&
11296
1.47M
                (cur[3] == '-')) {
11297
158k
                state = '-';
11298
158k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
158k
                start = cur;
11301
158k
                continue;
11302
158k
            }
11303
1.47M
        }
11304
46.6M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
1.13M
            state = *cur;
11306
1.13M
        }
11307
11308
110M
        cur++;
11309
110M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
531k
    if ((state == 0) || (state == '-')) {
11316
323k
        if (cur - start < 3)
11317
23.6k
            cur = start;
11318
299k
        else
11319
299k
            cur -= 3;
11320
323k
    }
11321
531k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
531k
    ctxt->endCheckState = state;
11323
531k
    return(0);
11324
660k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
385k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
385k
    int ix;
11340
385k
    unsigned char c;
11341
385k
    int codepoint;
11342
11343
385k
    if ((utf == NULL) || (len <= 0))
11344
15.3k
        return(0);
11345
11346
16.4M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
16.2M
        c = utf[ix];
11348
16.2M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
14.5M
      if (c >= 0x20)
11350
13.2M
    ix++;
11351
1.35M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
1.31M
          ix++;
11353
40.8k
      else
11354
40.8k
          return(-ix);
11355
14.5M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
1.02M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
1.01M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
30.4k
          return(-ix);
11359
984k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
984k
      codepoint |= utf[ix+1] & 0x3f;
11361
984k
      if (!xmlIsCharQ(codepoint))
11362
10.4k
          return(-ix);
11363
974k
      ix += 2;
11364
974k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
381k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
373k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
373k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
25.5k
        return(-ix);
11369
348k
      codepoint = (utf[ix] & 0xf) << 12;
11370
348k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
348k
      codepoint |= utf[ix+2] & 0x3f;
11372
348k
      if (!xmlIsCharQ(codepoint))
11373
13.7k
          return(-ix);
11374
334k
      ix += 3;
11375
334k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
232k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
226k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
226k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
226k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
24.9k
        return(-ix);
11381
201k
      codepoint = (utf[ix] & 0x7) << 18;
11382
201k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
201k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
201k
      codepoint |= utf[ix+3] & 0x3f;
11385
201k
      if (!xmlIsCharQ(codepoint))
11386
8.52k
          return(-ix);
11387
193k
      ix += 4;
11388
193k
  } else       /* unknown encoding */
11389
48.8k
      return(-ix);
11390
16.2M
      }
11391
146k
      return(ix);
11392
369k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
7.01M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
7.01M
    int ret = 0;
11406
7.01M
    int avail, tlen;
11407
7.01M
    xmlChar cur, next;
11408
11409
7.01M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
7.01M
    if ((ctxt->input != NULL) &&
11466
7.01M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
77.6k
        xmlParserInputShrink(ctxt->input);
11468
77.6k
    }
11469
11470
67.1M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
67.1M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
203k
      return(0);
11473
11474
66.9M
  if (ctxt->input == NULL) break;
11475
66.9M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
66.9M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
66.9M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
66.9M
          (ctxt->input->buf->raw != NULL) &&
11488
66.9M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
1.39M
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
1.39M
                                                 ctxt->input);
11491
1.39M
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
1.39M
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
1.39M
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
1.39M
                                      base, current);
11496
1.39M
      }
11497
66.9M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
66.9M
        (ctxt->input->cur - ctxt->input->base);
11499
66.9M
  }
11500
66.9M
        if (avail < 1)
11501
281k
      goto done;
11502
66.6M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
2.29M
            case XML_PARSER_START:
11509
2.29M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
544k
        xmlChar start[4];
11511
544k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
544k
        if (avail < 4)
11517
32.0k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
512k
        start[0] = RAW;
11527
512k
        start[1] = NXT(1);
11528
512k
        start[2] = NXT(2);
11529
512k
        start[3] = NXT(3);
11530
512k
        enc = xmlDetectCharEncoding(start, 4);
11531
512k
        xmlSwitchEncoding(ctxt, enc);
11532
512k
        break;
11533
544k
    }
11534
11535
1.74M
    if (avail < 2)
11536
331
        goto done;
11537
1.74M
    cur = ctxt->input->cur[0];
11538
1.74M
    next = ctxt->input->cur[1];
11539
1.74M
    if (cur == 0) {
11540
2.30k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
2.30k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
2.30k
                  &xmlDefaultSAXLocator);
11543
2.30k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
2.30k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
2.30k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
2.30k
      ctxt->sax->endDocument(ctxt->userData);
11551
2.30k
        goto done;
11552
2.30k
    }
11553
1.74M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
1.45M
        if (avail < 5) goto done;
11556
1.45M
        if ((!terminate) &&
11557
1.45M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
1.03M
      goto done;
11559
417k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
417k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
417k
                  &xmlDefaultSAXLocator);
11562
417k
        if ((ctxt->input->cur[2] == 'x') &&
11563
417k
      (ctxt->input->cur[3] == 'm') &&
11564
417k
      (ctxt->input->cur[4] == 'l') &&
11565
417k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
331k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
331k
      xmlParseXMLDecl(ctxt);
11572
331k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
742
          xmlHaltParser(ctxt);
11578
742
          return(0);
11579
742
      }
11580
330k
      ctxt->standalone = ctxt->input->standalone;
11581
330k
      if ((ctxt->encoding == NULL) &&
11582
330k
          (ctxt->input->encoding != NULL))
11583
34.3k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
330k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
330k
          (!ctxt->disableSAX))
11586
283k
          ctxt->sax->startDocument(ctxt->userData);
11587
330k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
330k
        } else {
11593
85.4k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
85.4k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
85.4k
          (!ctxt->disableSAX))
11596
85.4k
          ctxt->sax->startDocument(ctxt->userData);
11597
85.4k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
85.4k
        }
11603
417k
    } else {
11604
290k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
290k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
290k
                  &xmlDefaultSAXLocator);
11607
290k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
290k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
290k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
290k
            (!ctxt->disableSAX))
11614
290k
      ctxt->sax->startDocument(ctxt->userData);
11615
290k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
290k
    }
11621
707k
    break;
11622
11.9M
            case XML_PARSER_START_TAG: {
11623
11.9M
          const xmlChar *name;
11624
11.9M
    const xmlChar *prefix = NULL;
11625
11.9M
    const xmlChar *URI = NULL;
11626
11.9M
                int line = ctxt->input->line;
11627
11.9M
    int nsNr = ctxt->nsNr;
11628
11629
11.9M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
11.9M
    cur = ctxt->input->cur[0];
11632
11.9M
          if (cur != '<') {
11633
30.4k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
30.4k
        xmlHaltParser(ctxt);
11635
30.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
30.4k
      ctxt->sax->endDocument(ctxt->userData);
11637
30.4k
        goto done;
11638
30.4k
    }
11639
11.9M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
2.32M
                    goto done;
11641
9.58M
    if (ctxt->spaceNr == 0)
11642
408k
        spacePush(ctxt, -1);
11643
9.17M
    else if (*ctxt->space == -2)
11644
1.96M
        spacePush(ctxt, -1);
11645
7.21M
    else
11646
7.21M
        spacePush(ctxt, *ctxt->space);
11647
9.58M
#ifdef LIBXML_SAX1_ENABLED
11648
9.58M
    if (ctxt->sax2)
11649
6.27M
#endif /* LIBXML_SAX1_ENABLED */
11650
6.27M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
3.31M
#ifdef LIBXML_SAX1_ENABLED
11652
3.31M
    else
11653
3.31M
        name = xmlParseStartTag(ctxt);
11654
9.58M
#endif /* LIBXML_SAX1_ENABLED */
11655
9.58M
    if (ctxt->instate == XML_PARSER_EOF)
11656
743
        goto done;
11657
9.58M
    if (name == NULL) {
11658
47.7k
        spacePop(ctxt);
11659
47.7k
        xmlHaltParser(ctxt);
11660
47.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
47.7k
      ctxt->sax->endDocument(ctxt->userData);
11662
47.7k
        goto done;
11663
47.7k
    }
11664
9.53M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
9.53M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
9.53M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
9.53M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
9.53M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
2.03M
        SKIP(2);
11680
11681
2.03M
        if (ctxt->sax2) {
11682
1.53M
      if ((ctxt->sax != NULL) &&
11683
1.53M
          (ctxt->sax->endElementNs != NULL) &&
11684
1.53M
          (!ctxt->disableSAX))
11685
1.53M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
1.53M
                                  prefix, URI);
11687
1.53M
      if (ctxt->nsNr - nsNr > 0)
11688
32.4k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
1.53M
#ifdef LIBXML_SAX1_ENABLED
11690
1.53M
        } else {
11691
499k
      if ((ctxt->sax != NULL) &&
11692
499k
          (ctxt->sax->endElement != NULL) &&
11693
499k
          (!ctxt->disableSAX))
11694
498k
          ctxt->sax->endElement(ctxt->userData, name);
11695
499k
#endif /* LIBXML_SAX1_ENABLED */
11696
499k
        }
11697
2.03M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
2.03M
        spacePop(ctxt);
11700
2.03M
        if (ctxt->nameNr == 0) {
11701
6.49k
      ctxt->instate = XML_PARSER_EPILOG;
11702
2.02M
        } else {
11703
2.02M
      ctxt->instate = XML_PARSER_CONTENT;
11704
2.02M
        }
11705
2.03M
        break;
11706
2.03M
    }
11707
7.50M
    if (RAW == '>') {
11708
4.23M
        NEXT;
11709
4.23M
    } else {
11710
3.27M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
3.27M
           "Couldn't find end of Start Tag %s\n",
11712
3.27M
           name);
11713
3.27M
        nodePop(ctxt);
11714
3.27M
        spacePop(ctxt);
11715
3.27M
    }
11716
7.50M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
7.50M
    ctxt->instate = XML_PARSER_CONTENT;
11719
7.50M
                break;
11720
9.53M
      }
11721
47.0M
            case XML_PARSER_CONTENT: {
11722
47.0M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
103k
        goto done;
11724
46.9M
    cur = ctxt->input->cur[0];
11725
46.9M
    next = ctxt->input->cur[1];
11726
11727
46.9M
    if ((cur == '<') && (next == '/')) {
11728
2.68M
        ctxt->instate = XML_PARSER_END_TAG;
11729
2.68M
        break;
11730
44.2M
          } else if ((cur == '<') && (next == '?')) {
11731
475k
        if ((!terminate) &&
11732
475k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
130k
      goto done;
11734
345k
        xmlParsePI(ctxt);
11735
345k
        ctxt->instate = XML_PARSER_CONTENT;
11736
43.7M
    } else if ((cur == '<') && (next != '!')) {
11737
9.19M
        ctxt->instate = XML_PARSER_START_TAG;
11738
9.19M
        break;
11739
34.5M
    } else if ((cur == '<') && (next == '!') &&
11740
34.5M
               (ctxt->input->cur[2] == '-') &&
11741
34.5M
         (ctxt->input->cur[3] == '-')) {
11742
550k
        if ((!terminate) &&
11743
550k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
192k
      goto done;
11745
358k
        xmlParseComment(ctxt);
11746
358k
        ctxt->instate = XML_PARSER_CONTENT;
11747
34.0M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
34.0M
        (ctxt->input->cur[2] == '[') &&
11749
34.0M
        (ctxt->input->cur[3] == 'C') &&
11750
34.0M
        (ctxt->input->cur[4] == 'D') &&
11751
34.0M
        (ctxt->input->cur[5] == 'A') &&
11752
34.0M
        (ctxt->input->cur[6] == 'T') &&
11753
34.0M
        (ctxt->input->cur[7] == 'A') &&
11754
34.0M
        (ctxt->input->cur[8] == '[')) {
11755
142k
        SKIP(9);
11756
142k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
142k
        break;
11758
33.8M
    } else if ((cur == '<') && (next == '!') &&
11759
33.8M
               (avail < 9)) {
11760
31.7k
        goto done;
11761
33.8M
    } else if (cur == '<') {
11762
1.53M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
1.53M
                    "detected an error in element content\n");
11764
1.53M
                    SKIP(1);
11765
32.3M
    } else if (cur == '&') {
11766
3.93M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
330k
      goto done;
11768
3.60M
        xmlParseReference(ctxt);
11769
28.3M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
28.3M
        if ((ctxt->inputNr == 1) &&
11783
28.3M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
13.2M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
826k
          goto done;
11786
13.2M
                    }
11787
27.5M
                    ctxt->checkIndex = 0;
11788
27.5M
        xmlParseCharData(ctxt, 0);
11789
27.5M
    }
11790
33.3M
    break;
11791
46.9M
      }
11792
33.3M
            case XML_PARSER_END_TAG:
11793
2.77M
    if (avail < 2)
11794
0
        goto done;
11795
2.77M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
95.3k
        goto done;
11797
2.67M
    if (ctxt->sax2) {
11798
1.82M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.82M
        nameNsPop(ctxt);
11800
1.82M
    }
11801
849k
#ifdef LIBXML_SAX1_ENABLED
11802
849k
      else
11803
849k
        xmlParseEndTag1(ctxt, 0);
11804
2.67M
#endif /* LIBXML_SAX1_ENABLED */
11805
2.67M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
2.67M
    } else if (ctxt->nameNr == 0) {
11808
36.3k
        ctxt->instate = XML_PARSER_EPILOG;
11809
2.64M
    } else {
11810
2.64M
        ctxt->instate = XML_PARSER_CONTENT;
11811
2.64M
    }
11812
2.67M
    break;
11813
500k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
500k
    const xmlChar *term;
11819
11820
500k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
49.1k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
49.1k
                                           "]]>");
11827
451k
                } else {
11828
451k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
451k
                }
11830
11831
500k
    if (term == NULL) {
11832
261k
        int tmp, size;
11833
11834
261k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
8.69k
                        size = ctxt->input->end - ctxt->input->cur;
11837
253k
                    } else {
11838
253k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
115k
                            goto done;
11840
138k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
138k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
138k
                    }
11844
146k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
146k
                    if (tmp <= 0) {
11846
102k
                        tmp = -tmp;
11847
102k
                        ctxt->input->cur += tmp;
11848
102k
                        goto encoding_error;
11849
102k
                    }
11850
44.6k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
44.6k
                        if (ctxt->sax->cdataBlock != NULL)
11852
26.2k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
26.2k
                                                  ctxt->input->cur, tmp);
11854
18.4k
                        else if (ctxt->sax->characters != NULL)
11855
18.4k
                            ctxt->sax->characters(ctxt->userData,
11856
18.4k
                                                  ctxt->input->cur, tmp);
11857
44.6k
                    }
11858
44.6k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
44.6k
                    SKIPL(tmp);
11861
238k
    } else {
11862
238k
                    int base = term - CUR_PTR;
11863
238k
        int tmp;
11864
11865
238k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
238k
        if ((tmp < 0) || (tmp != base)) {
11867
116k
      tmp = -tmp;
11868
116k
      ctxt->input->cur += tmp;
11869
116k
      goto encoding_error;
11870
116k
        }
11871
121k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
121k
            (ctxt->sax->cdataBlock != NULL) &&
11873
121k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
9.59k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
9.59k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
9.59k
                     "<![CDATA[", 9)))
11882
9.58k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
9.58k
                                 BAD_CAST "", 0);
11884
112k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
112k
      (!ctxt->disableSAX)) {
11886
106k
      if (ctxt->sax->cdataBlock != NULL)
11887
72.6k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
72.6k
              ctxt->input->cur, base);
11889
33.9k
      else if (ctxt->sax->characters != NULL)
11890
33.9k
          ctxt->sax->characters(ctxt->userData,
11891
33.9k
              ctxt->input->cur, base);
11892
106k
        }
11893
121k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
121k
        SKIPL(base + 3);
11896
121k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
121k
    }
11902
166k
    break;
11903
500k
      }
11904
1.11M
            case XML_PARSER_MISC:
11905
1.33M
            case XML_PARSER_PROLOG:
11906
1.37M
            case XML_PARSER_EPILOG:
11907
1.37M
    SKIP_BLANKS;
11908
1.37M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.37M
    else
11912
1.37M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.37M
                (ctxt->input->cur - ctxt->input->base);
11914
1.37M
    if (avail < 2)
11915
35.2k
        goto done;
11916
1.34M
    cur = ctxt->input->cur[0];
11917
1.34M
    next = ctxt->input->cur[1];
11918
1.34M
          if ((cur == '<') && (next == '?')) {
11919
191k
        if ((!terminate) &&
11920
191k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
49.4k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
141k
        xmlParsePI(ctxt);
11927
141k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
1.15M
    } else if ((cur == '<') && (next == '!') &&
11930
1.15M
        (ctxt->input->cur[2] == '-') &&
11931
1.15M
        (ctxt->input->cur[3] == '-')) {
11932
97.3k
        if ((!terminate) &&
11933
97.3k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
53.3k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
44.0k
        xmlParseComment(ctxt);
11940
44.0k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
1.05M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
1.05M
                    (cur == '<') && (next == '!') &&
11944
1.05M
        (ctxt->input->cur[2] == 'D') &&
11945
1.05M
        (ctxt->input->cur[3] == 'O') &&
11946
1.05M
        (ctxt->input->cur[4] == 'C') &&
11947
1.05M
        (ctxt->input->cur[5] == 'T') &&
11948
1.05M
        (ctxt->input->cur[6] == 'Y') &&
11949
1.05M
        (ctxt->input->cur[7] == 'P') &&
11950
1.05M
        (ctxt->input->cur[8] == 'E')) {
11951
521k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
197k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
324k
        ctxt->inSubset = 1;
11958
324k
        xmlParseDocTypeDecl(ctxt);
11959
324k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
324k
        if (RAW == '[') {
11962
219k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
219k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
104k
      ctxt->inSubset = 2;
11972
104k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
104k
          (ctxt->sax->externalSubset != NULL))
11974
98.1k
          ctxt->sax->externalSubset(ctxt->userData,
11975
98.1k
            ctxt->intSubName, ctxt->extSubSystem,
11976
98.1k
            ctxt->extSubURI);
11977
104k
      ctxt->inSubset = 0;
11978
104k
      xmlCleanSpecialAttr(ctxt);
11979
104k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
104k
        }
11985
532k
    } else if ((cur == '<') && (next == '!') &&
11986
532k
               (avail <
11987
47.9k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
43.5k
        goto done;
11989
489k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
11.4k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
11.4k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
11.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
11.4k
      ctxt->sax->endDocument(ctxt->userData);
11998
11.4k
        goto done;
11999
477k
                } else {
12000
477k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
477k
    }
12006
987k
    break;
12007
987k
            case XML_PARSER_DTD: {
12008
727k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
531k
                    goto done;
12010
195k
    xmlParseInternalSubset(ctxt);
12011
195k
    if (ctxt->instate == XML_PARSER_EOF)
12012
83.4k
        goto done;
12013
111k
    ctxt->inSubset = 2;
12014
111k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
111k
        (ctxt->sax->externalSubset != NULL))
12016
108k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
108k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
111k
    ctxt->inSubset = 0;
12019
111k
    xmlCleanSpecialAttr(ctxt);
12020
111k
    if (ctxt->instate == XML_PARSER_EOF)
12021
5.22k
        goto done;
12022
106k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
106k
                break;
12028
111k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
66.6M
  }
12102
66.6M
    }
12103
6.59M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
6.59M
    return(ret);
12108
218k
encoding_error:
12109
218k
    {
12110
218k
        char buffer[150];
12111
12112
218k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
218k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
218k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
218k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
218k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
218k
         BAD_CAST buffer, NULL);
12118
218k
    }
12119
218k
    return(0);
12120
7.01M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
8.50M
              int terminate) {
12136
8.50M
    int end_in_lf = 0;
12137
8.50M
    int remain = 0;
12138
12139
8.50M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
8.50M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
1.69M
        return(ctxt->errNo);
12143
6.81M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.79k
        return(-1);
12145
6.81M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
6.81M
    ctxt->progressive = 1;
12149
6.81M
    if (ctxt->instate == XML_PARSER_START)
12150
1.57M
        xmlDetectSAX2(ctxt);
12151
6.81M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
6.81M
        (chunk[size - 1] == '\r')) {
12153
48.3k
  end_in_lf = 1;
12154
48.3k
  size--;
12155
48.3k
    }
12156
12157
7.02M
xmldecl_done:
12158
12159
7.02M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
7.02M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
6.53M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
6.53M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
6.53M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
6.53M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
6.53M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
338k
            unsigned int len = 45;
12173
12174
338k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
338k
                               BAD_CAST "UTF-16")) ||
12176
338k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
2.89k
                               BAD_CAST "UTF16")))
12178
335k
                len = 90;
12179
2.89k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
2.89k
                                    BAD_CAST "UCS-4")) ||
12181
2.89k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
477
                                    BAD_CAST "UCS4")))
12183
2.41k
                len = 180;
12184
12185
338k
            if (ctxt->input->buf->rawconsumed < len)
12186
14.4k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
338k
            if ((unsigned int) size > len) {
12194
214k
                remain = size - len;
12195
214k
                size = len;
12196
214k
            } else {
12197
124k
                remain = 0;
12198
124k
            }
12199
338k
        }
12200
6.53M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
6.53M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
6.53M
  if (res < 0) {
12203
2.01k
      ctxt->errNo = XML_PARSER_EOF;
12204
2.01k
      xmlHaltParser(ctxt);
12205
2.01k
      return (XML_PARSER_EOF);
12206
2.01k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
6.53M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
487k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
487k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
487k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
487k
        (in->raw != NULL)) {
12216
51.2k
    int nbchars;
12217
51.2k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
51.2k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
51.2k
    nbchars = xmlCharEncInput(in, terminate);
12221
51.2k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
51.2k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
3.18k
        xmlGenericError(xmlGenericErrorContext,
12225
3.18k
            "xmlParseChunk: encoder error\n");
12226
3.18k
                    xmlHaltParser(ctxt);
12227
3.18k
        return(XML_ERR_INVALID_ENCODING);
12228
3.18k
    }
12229
51.2k
      }
12230
487k
  }
12231
487k
    }
12232
12233
7.01M
    if (remain != 0) {
12234
214k
        xmlParseTryOrFinish(ctxt, 0);
12235
6.80M
    } else {
12236
6.80M
        xmlParseTryOrFinish(ctxt, terminate);
12237
6.80M
    }
12238
7.01M
    if (ctxt->instate == XML_PARSER_EOF)
12239
182k
        return(ctxt->errNo);
12240
12241
6.83M
    if ((ctxt->input != NULL) &&
12242
6.83M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
6.83M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
6.83M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
6.83M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
205k
        return(ctxt->errNo);
12250
12251
6.63M
    if (remain != 0) {
12252
213k
        chunk += size;
12253
213k
        size = remain;
12254
213k
        remain = 0;
12255
213k
        goto xmldecl_done;
12256
213k
    }
12257
6.41M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
6.41M
        (ctxt->input->buf != NULL)) {
12259
46.7k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
46.7k
           ctxt->input);
12261
46.7k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
46.7k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
46.7k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
46.7k
            base, current);
12267
46.7k
    }
12268
6.41M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
208k
  int cur_avail = 0;
12273
12274
208k
  if (ctxt->input != NULL) {
12275
208k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
208k
      else
12279
208k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
208k
                    (ctxt->input->cur - ctxt->input->base);
12281
208k
  }
12282
12283
208k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
208k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
183k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
183k
  }
12287
208k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
499
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
499
  }
12290
208k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
208k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
208k
    ctxt->sax->endDocument(ctxt->userData);
12293
208k
  }
12294
208k
  ctxt->instate = XML_PARSER_EOF;
12295
208k
    }
12296
6.41M
    if (ctxt->wellFormed == 0)
12297
3.17M
  return((xmlParserErrors) ctxt->errNo);
12298
3.23M
    else
12299
3.23M
        return(0);
12300
6.41M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
728k
                        const char *chunk, int size, const char *filename) {
12330
728k
    xmlParserCtxtPtr ctxt;
12331
728k
    xmlParserInputPtr inputStream;
12332
728k
    xmlParserInputBufferPtr buf;
12333
728k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
728k
    if ((chunk != NULL) && (size >= 4))
12339
356k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
728k
    buf = xmlAllocParserInputBuffer(enc);
12342
728k
    if (buf == NULL) return(NULL);
12343
12344
728k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
728k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
728k
    ctxt->dictNames = 1;
12351
728k
    if (filename == NULL) {
12352
364k
  ctxt->directory = NULL;
12353
364k
    } else {
12354
364k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
364k
    }
12356
12357
728k
    inputStream = xmlNewInputStream(ctxt);
12358
728k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
728k
    if (filename == NULL)
12365
364k
  inputStream->filename = NULL;
12366
364k
    else {
12367
364k
  inputStream->filename = (char *)
12368
364k
      xmlCanonicPath((const xmlChar *) filename);
12369
364k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
364k
    }
12376
728k
    inputStream->buf = buf;
12377
728k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
728k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
728k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
728k
    if ((size != 0) && (chunk != NULL) &&
12388
728k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
356k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
356k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
356k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
356k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
356k
    }
12399
12400
728k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
200k
        xmlSwitchEncoding(ctxt, enc);
12402
200k
    }
12403
12404
728k
    return(ctxt);
12405
728k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
645k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
645k
    if (ctxt == NULL)
12418
0
        return;
12419
645k
    ctxt->instate = XML_PARSER_EOF;
12420
645k
    ctxt->disableSAX = 1;
12421
655k
    while (ctxt->inputNr > 1)
12422
9.98k
        xmlFreeInputStream(inputPop(ctxt));
12423
645k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
645k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
645k
        if (ctxt->input->buf != NULL) {
12433
572k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
572k
            ctxt->input->buf = NULL;
12435
572k
        }
12436
645k
  ctxt->input->cur = BAD_CAST"";
12437
645k
        ctxt->input->length = 0;
12438
645k
  ctxt->input->base = ctxt->input->cur;
12439
645k
        ctxt->input->end = ctxt->input->cur;
12440
645k
    }
12441
645k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
364k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
364k
    if (ctxt == NULL)
12452
0
        return;
12453
364k
    xmlHaltParser(ctxt);
12454
364k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
364k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
116k
          const xmlChar *ID, xmlNodePtr *list) {
12832
116k
    xmlParserCtxtPtr ctxt;
12833
116k
    xmlDocPtr newDoc;
12834
116k
    xmlNodePtr newRoot;
12835
116k
    xmlParserErrors ret = XML_ERR_OK;
12836
116k
    xmlChar start[4];
12837
116k
    xmlCharEncoding enc;
12838
12839
116k
    if (((depth > 40) &&
12840
116k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
116k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
116k
    if (list != NULL)
12848
22.6k
        *list = NULL;
12849
116k
    if ((URL == NULL) && (ID == NULL))
12850
115
  return(XML_ERR_INTERNAL_ERROR);
12851
116k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
116k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
116k
                                             oldctxt);
12856
116k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
21.9k
    if (oldctxt != NULL) {
12858
21.9k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
21.9k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
21.9k
    }
12861
21.9k
    xmlDetectSAX2(ctxt);
12862
12863
21.9k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
21.9k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
21.9k
    newDoc->properties = XML_DOC_INTERNAL;
12869
21.9k
    if (doc) {
12870
21.9k
        newDoc->intSubset = doc->intSubset;
12871
21.9k
        newDoc->extSubset = doc->extSubset;
12872
21.9k
        if (doc->dict) {
12873
13.2k
            newDoc->dict = doc->dict;
12874
13.2k
            xmlDictReference(newDoc->dict);
12875
13.2k
        }
12876
21.9k
        if (doc->URL != NULL) {
12877
13.6k
            newDoc->URL = xmlStrdup(doc->URL);
12878
13.6k
        }
12879
21.9k
    }
12880
21.9k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
21.9k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
21.9k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
21.9k
    nodePush(ctxt, newDoc->children);
12891
21.9k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
21.9k
    } else {
12894
21.9k
        ctxt->myDoc = doc;
12895
21.9k
        newRoot->doc = doc;
12896
21.9k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
21.9k
    GROW;
12904
21.9k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
19.1k
  start[0] = RAW;
12906
19.1k
  start[1] = NXT(1);
12907
19.1k
  start[2] = NXT(2);
12908
19.1k
  start[3] = NXT(3);
12909
19.1k
  enc = xmlDetectCharEncoding(start, 4);
12910
19.1k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
3.25k
      xmlSwitchEncoding(ctxt, enc);
12912
3.25k
  }
12913
19.1k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
21.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
2.26k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
2.26k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
2.26k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
361
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
361
                           "Version mismatch between document and entity\n");
12927
361
        }
12928
2.26k
    }
12929
12930
21.9k
    ctxt->instate = XML_PARSER_CONTENT;
12931
21.9k
    ctxt->depth = depth;
12932
21.9k
    if (oldctxt != NULL) {
12933
21.9k
  ctxt->_private = oldctxt->_private;
12934
21.9k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
21.9k
  ctxt->validate = oldctxt->validate;
12936
21.9k
  ctxt->valid = oldctxt->valid;
12937
21.9k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
21.9k
        if (oldctxt->validate) {
12939
10.7k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
10.7k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
10.7k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
10.7k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
10.7k
        }
12944
21.9k
  ctxt->external = oldctxt->external;
12945
21.9k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
21.9k
        ctxt->dict = oldctxt->dict;
12947
21.9k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
21.9k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
21.9k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
21.9k
        ctxt->dictNames = oldctxt->dictNames;
12951
21.9k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
21.9k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
21.9k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
21.9k
  ctxt->record_info = oldctxt->record_info;
12955
21.9k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
21.9k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
21.9k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
21.9k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
21.9k
    xmlParseContent(ctxt);
12970
12971
21.9k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
462
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
21.4k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
21.9k
    if (ctxt->node != newDoc->children) {
12977
3.54k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
3.54k
    }
12979
12980
21.9k
    if (!ctxt->wellFormed) {
12981
7.69k
  ret = (xmlParserErrors)ctxt->errNo;
12982
7.69k
        if (oldctxt != NULL) {
12983
7.69k
            oldctxt->errNo = ctxt->errNo;
12984
7.69k
            oldctxt->wellFormed = 0;
12985
7.69k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
7.69k
        }
12987
14.2k
    } else {
12988
14.2k
  if (list != NULL) {
12989
3.07k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
3.07k
      cur = newDoc->children->children;
12996
3.07k
      *list = cur;
12997
13.5k
      while (cur != NULL) {
12998
10.4k
    cur->parent = NULL;
12999
10.4k
    cur = cur->next;
13000
10.4k
      }
13001
3.07k
            newDoc->children->children = NULL;
13002
3.07k
  }
13003
14.2k
  ret = XML_ERR_OK;
13004
14.2k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
21.9k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
21.9k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
21.9k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
21.9k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
21.9k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
21.9k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
21.9k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
21.9k
    }
13020
13021
21.9k
    if (oldctxt != NULL) {
13022
21.9k
        ctxt->dict = NULL;
13023
21.9k
        ctxt->attsDefault = NULL;
13024
21.9k
        ctxt->attsSpecial = NULL;
13025
21.9k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
21.9k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
21.9k
        oldctxt->validate = ctxt->validate;
13028
21.9k
        oldctxt->valid = ctxt->valid;
13029
21.9k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
21.9k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
21.9k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
21.9k
    }
13033
21.9k
    ctxt->node_seq.maximum = 0;
13034
21.9k
    ctxt->node_seq.length = 0;
13035
21.9k
    ctxt->node_seq.buffer = NULL;
13036
21.9k
    xmlFreeParserCtxt(ctxt);
13037
21.9k
    newDoc->intSubset = NULL;
13038
21.9k
    newDoc->extSubset = NULL;
13039
21.9k
    xmlFreeDoc(newDoc);
13040
13041
21.9k
    return(ret);
13042
21.9k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
69.3k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
69.3k
    xmlParserCtxtPtr ctxt;
13125
69.3k
    xmlDocPtr newDoc = NULL;
13126
69.3k
    xmlNodePtr newRoot;
13127
69.3k
    xmlSAXHandlerPtr oldsax = NULL;
13128
69.3k
    xmlNodePtr content = NULL;
13129
69.3k
    xmlNodePtr last = NULL;
13130
69.3k
    int size;
13131
69.3k
    xmlParserErrors ret = XML_ERR_OK;
13132
69.3k
#ifdef SAX2
13133
69.3k
    int i;
13134
69.3k
#endif
13135
13136
69.3k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
69.3k
        (oldctxt->depth >  100)) {
13138
63
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
63
                       "Maximum entity nesting depth exceeded");
13140
63
  return(XML_ERR_ENTITY_LOOP);
13141
63
    }
13142
13143
13144
69.2k
    if (lst != NULL)
13145
64.4k
        *lst = NULL;
13146
69.2k
    if (string == NULL)
13147
48
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
69.2k
    size = xmlStrlen(string);
13150
13151
69.2k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
69.2k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
65.6k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
65.6k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
65.6k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
65.6k
    else
13158
65.6k
  ctxt->userData = ctxt;
13159
65.6k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
65.6k
    ctxt->dict = oldctxt->dict;
13161
65.6k
    ctxt->input_id = oldctxt->input_id;
13162
65.6k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
65.6k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
65.6k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
65.6k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
66.1k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
571
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
571
    }
13171
65.6k
#endif
13172
13173
65.6k
    oldsax = ctxt->sax;
13174
65.6k
    ctxt->sax = oldctxt->sax;
13175
65.6k
    xmlDetectSAX2(ctxt);
13176
65.6k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
65.6k
    ctxt->options = oldctxt->options;
13178
13179
65.6k
    ctxt->_private = oldctxt->_private;
13180
65.6k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
65.6k
    } else {
13193
65.6k
  ctxt->myDoc = oldctxt->myDoc;
13194
65.6k
        content = ctxt->myDoc->children;
13195
65.6k
  last = ctxt->myDoc->last;
13196
65.6k
    }
13197
65.6k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
65.6k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
65.6k
    ctxt->myDoc->children = NULL;
13208
65.6k
    ctxt->myDoc->last = NULL;
13209
65.6k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
65.6k
    nodePush(ctxt, ctxt->myDoc->children);
13211
65.6k
    ctxt->instate = XML_PARSER_CONTENT;
13212
65.6k
    ctxt->depth = oldctxt->depth;
13213
13214
65.6k
    ctxt->validate = 0;
13215
65.6k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
65.6k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
56.8k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
56.8k
    }
13222
65.6k
    ctxt->dictNames = oldctxt->dictNames;
13223
65.6k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
65.6k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
65.6k
    xmlParseContent(ctxt);
13227
65.6k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
402
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
65.1k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
65.6k
    if (ctxt->node != ctxt->myDoc->children) {
13233
2.00k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
2.00k
    }
13235
13236
65.6k
    if (!ctxt->wellFormed) {
13237
8.57k
  ret = (xmlParserErrors)ctxt->errNo;
13238
8.57k
        oldctxt->errNo = ctxt->errNo;
13239
8.57k
        oldctxt->wellFormed = 0;
13240
8.57k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
57.0k
    } else {
13242
57.0k
        ret = XML_ERR_OK;
13243
57.0k
    }
13244
13245
65.6k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
55.4k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
55.4k
  cur = ctxt->myDoc->children->children;
13253
55.4k
  *lst = cur;
13254
183k
  while (cur != NULL) {
13255
128k
#ifdef LIBXML_VALID_ENABLED
13256
128k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
128k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
128k
    (cur->type == XML_ELEMENT_NODE)) {
13259
14.6k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
14.6k
      oldctxt->myDoc, cur);
13261
14.6k
      }
13262
128k
#endif /* LIBXML_VALID_ENABLED */
13263
128k
      cur->parent = NULL;
13264
128k
      cur = cur->next;
13265
128k
  }
13266
55.4k
  ctxt->myDoc->children->children = NULL;
13267
55.4k
    }
13268
65.6k
    if (ctxt->myDoc != NULL) {
13269
65.6k
  xmlFreeNode(ctxt->myDoc->children);
13270
65.6k
        ctxt->myDoc->children = content;
13271
65.6k
        ctxt->myDoc->last = last;
13272
65.6k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
65.6k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
65.6k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
65.6k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
65.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
65.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
65.6k
    }
13285
13286
65.6k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
65.6k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
65.6k
    ctxt->sax = oldsax;
13289
65.6k
    ctxt->dict = NULL;
13290
65.6k
    ctxt->attsDefault = NULL;
13291
65.6k
    ctxt->attsSpecial = NULL;
13292
65.6k
    xmlFreeParserCtxt(ctxt);
13293
65.6k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
65.6k
    return(ret);
13298
65.6k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
116k
        xmlParserCtxtPtr pctx) {
13783
116k
    xmlParserCtxtPtr ctxt;
13784
116k
    xmlParserInputPtr inputStream;
13785
116k
    char *directory = NULL;
13786
116k
    xmlChar *uri;
13787
13788
116k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
116k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
116k
    if (pctx != NULL) {
13794
116k
        ctxt->options = pctx->options;
13795
116k
        ctxt->_private = pctx->_private;
13796
116k
  ctxt->input_id = pctx->input_id;
13797
116k
    }
13798
13799
    /* Don't read from stdin. */
13800
116k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
7
        URL = BAD_CAST "./-";
13802
13803
116k
    uri = xmlBuildURI(URL, base);
13804
13805
116k
    if (uri == NULL) {
13806
5.94k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
5.94k
  if (inputStream == NULL) {
13808
5.92k
      xmlFreeParserCtxt(ctxt);
13809
5.92k
      return(NULL);
13810
5.92k
  }
13811
13812
26
  inputPush(ctxt, inputStream);
13813
13814
26
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
26
      directory = xmlParserGetDirectory((char *)URL);
13816
26
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
26
      ctxt->directory = directory;
13818
110k
    } else {
13819
110k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
110k
  if (inputStream == NULL) {
13821
88.9k
      xmlFree(uri);
13822
88.9k
      xmlFreeParserCtxt(ctxt);
13823
88.9k
      return(NULL);
13824
88.9k
  }
13825
13826
21.9k
  inputPush(ctxt, inputStream);
13827
13828
21.9k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
21.9k
      directory = xmlParserGetDirectory((char *)uri);
13830
21.9k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
21.9k
      ctxt->directory = directory;
13832
21.9k
  xmlFree(uri);
13833
21.9k
    }
13834
21.9k
    return(ctxt);
13835
116k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
433k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
433k
    xmlParserCtxtPtr ctxt;
14178
433k
    xmlParserInputPtr input;
14179
433k
    xmlParserInputBufferPtr buf;
14180
14181
433k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
433k
    if (size <= 0)
14184
4.81k
  return(NULL);
14185
14186
428k
    ctxt = xmlNewParserCtxt();
14187
428k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
428k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
428k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
428k
    input = xmlNewInputStream(ctxt);
14197
428k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
428k
    input->filename = NULL;
14204
428k
    input->buf = buf;
14205
428k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
428k
    inputPush(ctxt, input);
14208
428k
    return(ctxt);
14209
428k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
333M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
333M
    if (xmlParserInitialized != 0)
14525
333M
  return;
14526
14527
3.70k
#ifdef LIBXML_THREAD_ENABLED
14528
3.70k
    __xmlGlobalInitMutexLock();
14529
3.70k
    if (xmlParserInitialized == 0) {
14530
3.70k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.70k
  xmlInitThreadsInternal();
14537
3.70k
  xmlInitGlobalsInternal();
14538
3.70k
  xmlInitMemoryInternal();
14539
3.70k
        __xmlInitializeDict();
14540
3.70k
  xmlInitEncodingInternal();
14541
3.70k
  xmlRegisterDefaultInputCallbacks();
14542
3.70k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.70k
  xmlRegisterDefaultOutputCallbacks();
14544
3.70k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.70k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.70k
  xmlInitXPathInternal();
14547
3.70k
#endif
14548
3.70k
  xmlParserInitialized = 1;
14549
3.70k
#ifdef LIBXML_THREAD_ENABLED
14550
3.70k
    }
14551
3.70k
    __xmlGlobalInitMutexUnlock();
14552
3.70k
#endif
14553
3.70k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
1.09M
{
14843
1.09M
    if (ctxt == NULL)
14844
0
        return(-1);
14845
1.09M
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
1.09M
    if (options & XML_PARSE_RECOVER) {
14851
646k
        ctxt->recovery = 1;
14852
646k
        options -= XML_PARSE_RECOVER;
14853
646k
  ctxt->options |= XML_PARSE_RECOVER;
14854
646k
    } else
14855
445k
        ctxt->recovery = 0;
14856
1.09M
    if (options & XML_PARSE_DTDLOAD) {
14857
770k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
770k
        options -= XML_PARSE_DTDLOAD;
14859
770k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
770k
    } else
14861
320k
        ctxt->loadsubset = 0;
14862
1.09M
    if (options & XML_PARSE_DTDATTR) {
14863
468k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
468k
        options -= XML_PARSE_DTDATTR;
14865
468k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
468k
    }
14867
1.09M
    if (options & XML_PARSE_NOENT) {
14868
663k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
663k
        options -= XML_PARSE_NOENT;
14871
663k
  ctxt->options |= XML_PARSE_NOENT;
14872
663k
    } else
14873
428k
        ctxt->replaceEntities = 0;
14874
1.09M
    if (options & XML_PARSE_PEDANTIC) {
14875
206k
        ctxt->pedantic = 1;
14876
206k
        options -= XML_PARSE_PEDANTIC;
14877
206k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
206k
    } else
14879
884k
        ctxt->pedantic = 0;
14880
1.09M
    if (options & XML_PARSE_NOBLANKS) {
14881
432k
        ctxt->keepBlanks = 0;
14882
432k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
432k
        options -= XML_PARSE_NOBLANKS;
14884
432k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
432k
    } else
14886
658k
        ctxt->keepBlanks = 1;
14887
1.09M
    if (options & XML_PARSE_DTDVALID) {
14888
401k
        ctxt->validate = 1;
14889
401k
        if (options & XML_PARSE_NOWARNING)
14890
225k
            ctxt->vctxt.warning = NULL;
14891
401k
        if (options & XML_PARSE_NOERROR)
14892
332k
            ctxt->vctxt.error = NULL;
14893
401k
        options -= XML_PARSE_DTDVALID;
14894
401k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
401k
    } else
14896
690k
        ctxt->validate = 0;
14897
1.09M
    if (options & XML_PARSE_NOWARNING) {
14898
359k
        ctxt->sax->warning = NULL;
14899
359k
        options -= XML_PARSE_NOWARNING;
14900
359k
    }
14901
1.09M
    if (options & XML_PARSE_NOERROR) {
14902
561k
        ctxt->sax->error = NULL;
14903
561k
        ctxt->sax->fatalError = NULL;
14904
561k
        options -= XML_PARSE_NOERROR;
14905
561k
    }
14906
1.09M
#ifdef LIBXML_SAX1_ENABLED
14907
1.09M
    if (options & XML_PARSE_SAX1) {
14908
372k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
372k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
372k
        ctxt->sax->startElementNs = NULL;
14911
372k
        ctxt->sax->endElementNs = NULL;
14912
372k
        ctxt->sax->initialized = 1;
14913
372k
        options -= XML_PARSE_SAX1;
14914
372k
  ctxt->options |= XML_PARSE_SAX1;
14915
372k
    }
14916
1.09M
#endif /* LIBXML_SAX1_ENABLED */
14917
1.09M
    if (options & XML_PARSE_NODICT) {
14918
395k
        ctxt->dictNames = 0;
14919
395k
        options -= XML_PARSE_NODICT;
14920
395k
  ctxt->options |= XML_PARSE_NODICT;
14921
695k
    } else {
14922
695k
        ctxt->dictNames = 1;
14923
695k
    }
14924
1.09M
    if (options & XML_PARSE_NOCDATA) {
14925
420k
        ctxt->sax->cdataBlock = NULL;
14926
420k
        options -= XML_PARSE_NOCDATA;
14927
420k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
420k
    }
14929
1.09M
    if (options & XML_PARSE_NSCLEAN) {
14930
569k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
569k
        options -= XML_PARSE_NSCLEAN;
14932
569k
    }
14933
1.09M
    if (options & XML_PARSE_NONET) {
14934
485k
  ctxt->options |= XML_PARSE_NONET;
14935
485k
        options -= XML_PARSE_NONET;
14936
485k
    }
14937
1.09M
    if (options & XML_PARSE_COMPACT) {
14938
661k
  ctxt->options |= XML_PARSE_COMPACT;
14939
661k
        options -= XML_PARSE_COMPACT;
14940
661k
    }
14941
1.09M
    if (options & XML_PARSE_OLD10) {
14942
344k
  ctxt->options |= XML_PARSE_OLD10;
14943
344k
        options -= XML_PARSE_OLD10;
14944
344k
    }
14945
1.09M
    if (options & XML_PARSE_NOBASEFIX) {
14946
408k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
408k
        options -= XML_PARSE_NOBASEFIX;
14948
408k
    }
14949
1.09M
    if (options & XML_PARSE_HUGE) {
14950
333k
  ctxt->options |= XML_PARSE_HUGE;
14951
333k
        options -= XML_PARSE_HUGE;
14952
333k
        if (ctxt->dict != NULL)
14953
333k
            xmlDictSetLimit(ctxt->dict, 0);
14954
333k
    }
14955
1.09M
    if (options & XML_PARSE_OLDSAX) {
14956
361k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
361k
        options -= XML_PARSE_OLDSAX;
14958
361k
    }
14959
1.09M
    if (options & XML_PARSE_IGNORE_ENC) {
14960
522k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
522k
        options -= XML_PARSE_IGNORE_ENC;
14962
522k
    }
14963
1.09M
    if (options & XML_PARSE_BIG_LINES) {
14964
446k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
446k
        options -= XML_PARSE_BIG_LINES;
14966
446k
    }
14967
1.09M
    ctxt->linenumbers = 1;
14968
1.09M
    return (options);
14969
1.09M
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
728k
{
14984
728k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
728k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
363k
{
15003
363k
    xmlDocPtr ret;
15004
15005
363k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
363k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
363k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
363k
        (ctxt->input->filename == NULL))
15015
363k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
363k
    xmlParseDocument(ctxt);
15017
363k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
223k
        ret = ctxt->myDoc;
15019
139k
    else {
15020
139k
        ret = NULL;
15021
139k
  if (ctxt->myDoc != NULL) {
15022
116k
      xmlFreeDoc(ctxt->myDoc);
15023
116k
  }
15024
139k
    }
15025
363k
    ctxt->myDoc = NULL;
15026
363k
    if (!reuse) {
15027
363k
  xmlFreeParserCtxt(ctxt);
15028
363k
    }
15029
15030
363k
    return (ret);
15031
363k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
364k
{
15096
364k
    xmlParserCtxtPtr ctxt;
15097
15098
364k
    xmlInitParser();
15099
364k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
364k
    if (ctxt == NULL)
15101
1.18k
        return (NULL);
15102
363k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
364k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387