Coverage Report

Created: 2024-01-17 17:01

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
13.3M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
714
#define XML_PARSER_NON_LINEAR 10
129
130
67.6M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
65.3M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
6.04G
#define XML_PARSER_BUFFER_SIZE 100
147
572k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
27.9M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
29.9k
{
215
29.9k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
29.9k
        (ctxt->instate == XML_PARSER_EOF))
217
18
  return;
218
29.9k
    if (ctxt != NULL)
219
29.9k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
29.9k
    if (prefix == NULL)
222
15.9k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
15.9k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
15.9k
                        (const char *) localname, NULL, NULL, 0, 0,
225
15.9k
                        "Attribute %s redefined\n", localname);
226
13.9k
    else
227
13.9k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
13.9k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
13.9k
                        (const char *) prefix, (const char *) localname,
230
13.9k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
13.9k
                        localname);
232
29.9k
    if (ctxt != NULL) {
233
29.9k
  ctxt->wellFormed = 0;
234
29.9k
  if (ctxt->recovery == 0)
235
12.7k
      ctxt->disableSAX = 1;
236
29.9k
    }
237
29.9k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
1.47M
{
250
1.47M
    const char *errmsg;
251
252
1.47M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
1.47M
        (ctxt->instate == XML_PARSER_EOF))
254
12.8k
  return;
255
1.45M
    switch (error) {
256
16.1k
        case XML_ERR_INVALID_HEX_CHARREF:
257
16.1k
            errmsg = "CharRef: invalid hexadecimal value";
258
16.1k
            break;
259
27.9k
        case XML_ERR_INVALID_DEC_CHARREF:
260
27.9k
            errmsg = "CharRef: invalid decimal value";
261
27.9k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
367k
        case XML_ERR_INTERNAL_ERROR:
266
367k
            errmsg = "internal error";
267
367k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
414k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
414k
            errmsg = "PEReference: expecting ';'";
282
414k
            break;
283
1.38k
        case XML_ERR_ENTITY_LOOP:
284
1.38k
            errmsg = "Detected an entity reference loop";
285
1.38k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
5.28k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
5.28k
            errmsg = "PEReferences forbidden in internal subset";
291
5.28k
            break;
292
1.82k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
1.82k
            errmsg = "EntityValue: \" or ' expected";
294
1.82k
            break;
295
29.5k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
29.5k
            errmsg = "AttValue: \" or ' expected";
297
29.5k
            break;
298
66.9k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
66.9k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
66.9k
            break;
301
9.33k
        case XML_ERR_LITERAL_NOT_STARTED:
302
9.33k
            errmsg = "SystemLiteral \" or ' expected";
303
9.33k
            break;
304
11.5k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
11.5k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
11.5k
            break;
307
12.3k
        case XML_ERR_MISPLACED_CDATA_END:
308
12.3k
            errmsg = "Sequence ']]>' not allowed in content";
309
12.3k
            break;
310
7.68k
        case XML_ERR_URI_REQUIRED:
311
7.68k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
7.68k
            break;
313
1.75k
        case XML_ERR_PUBID_REQUIRED:
314
1.75k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
1.75k
            break;
316
14.6k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
14.6k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
14.6k
            break;
319
8.02k
        case XML_ERR_PI_NOT_STARTED:
320
8.02k
            errmsg = "xmlParsePI : no target name";
321
8.02k
            break;
322
2.18k
        case XML_ERR_RESERVED_XML_NAME:
323
2.18k
            errmsg = "Invalid PI name";
324
2.18k
            break;
325
3.32k
        case XML_ERR_NOTATION_NOT_STARTED:
326
3.32k
            errmsg = "NOTATION: Name expected here";
327
3.32k
            break;
328
11.5k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
11.5k
            errmsg = "'>' required to close NOTATION declaration";
330
11.5k
            break;
331
9.19k
        case XML_ERR_VALUE_REQUIRED:
332
9.19k
            errmsg = "Entity value required";
333
9.19k
            break;
334
2.59k
        case XML_ERR_URI_FRAGMENT:
335
2.59k
            errmsg = "Fragment not allowed";
336
2.59k
            break;
337
5.56k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
5.56k
            errmsg = "'(' required to start ATTLIST enumeration";
339
5.56k
            break;
340
959
        case XML_ERR_NMTOKEN_REQUIRED:
341
959
            errmsg = "NmToken expected in ATTLIST enumeration";
342
959
            break;
343
2.38k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
2.38k
            errmsg = "')' required to finish ATTLIST enumeration";
345
2.38k
            break;
346
2.94k
        case XML_ERR_MIXED_NOT_STARTED:
347
2.94k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
2.94k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
5.35k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
5.35k
            errmsg = "ContentDecl : Name or '(' expected";
354
5.35k
            break;
355
6.51k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
6.51k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
6.51k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
75.3k
        case XML_ERR_GT_REQUIRED:
363
75.3k
            errmsg = "expected '>'";
364
75.3k
            break;
365
291
        case XML_ERR_CONDSEC_INVALID:
366
291
            errmsg = "XML conditional section '[' expected";
367
291
            break;
368
8.16k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
8.16k
            errmsg = "Content error in the external subset";
370
8.16k
            break;
371
770
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
770
            errmsg =
373
770
                "conditional section INCLUDE or IGNORE keyword expected";
374
770
            break;
375
1.11k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
1.11k
            errmsg = "XML conditional section not closed";
377
1.11k
            break;
378
179
        case XML_ERR_XMLDECL_NOT_STARTED:
379
179
            errmsg = "Text declaration '<?xml' required";
380
179
            break;
381
66.8k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
66.8k
            errmsg = "parsing XML declaration: '?>' expected";
383
66.8k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
75.3k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
75.3k
            errmsg = "EntityRef: expecting ';'";
389
75.3k
            break;
390
6.41k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
6.41k
            errmsg = "DOCTYPE improperly terminated";
392
6.41k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
4.75k
        case XML_ERR_EQUAL_REQUIRED:
397
4.75k
            errmsg = "expected '='";
398
4.75k
            break;
399
19.6k
        case XML_ERR_STRING_NOT_CLOSED:
400
19.6k
            errmsg = "String not closed expecting \" or '";
401
19.6k
            break;
402
4.99k
        case XML_ERR_STRING_NOT_STARTED:
403
4.99k
            errmsg = "String not started expecting ' or \"";
404
4.99k
            break;
405
1.22k
        case XML_ERR_ENCODING_NAME:
406
1.22k
            errmsg = "Invalid XML encoding name";
407
1.22k
            break;
408
1.51k
        case XML_ERR_STANDALONE_VALUE:
409
1.51k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.51k
            break;
411
15.5k
        case XML_ERR_DOCUMENT_EMPTY:
412
15.5k
            errmsg = "Document is empty";
413
15.5k
            break;
414
92.2k
        case XML_ERR_DOCUMENT_END:
415
92.2k
            errmsg = "Extra content at the end of the document";
416
92.2k
            break;
417
14.1k
        case XML_ERR_NOT_WELL_BALANCED:
418
14.1k
            errmsg = "chunk is not well balanced";
419
14.1k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
19.7k
        case XML_ERR_VERSION_MISSING:
424
19.7k
            errmsg = "Malformed declaration expecting version";
425
19.7k
            break;
426
103
        case XML_ERR_NAME_TOO_LONG:
427
103
            errmsg = "Name too long";
428
103
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
2.91k
        default:
435
2.91k
            errmsg = "Unregistered error message";
436
1.45M
    }
437
1.45M
    if (ctxt != NULL)
438
1.45M
  ctxt->errNo = error;
439
1.45M
    if (info == NULL) {
440
1.09M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
1.09M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
1.09M
                        errmsg);
443
1.09M
    } else {
444
367k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
367k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
367k
                        errmsg, info);
447
367k
    }
448
1.45M
    if (ctxt != NULL) {
449
1.45M
  ctxt->wellFormed = 0;
450
1.45M
  if (ctxt->recovery == 0)
451
644k
      ctxt->disableSAX = 1;
452
1.45M
    }
453
1.45M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
1.31M
{
467
1.31M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
1.31M
        (ctxt->instate == XML_PARSER_EOF))
469
90
  return;
470
1.31M
    if (ctxt != NULL)
471
1.31M
  ctxt->errNo = error;
472
1.31M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
1.31M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
1.31M
    if (ctxt != NULL) {
475
1.31M
  ctxt->wellFormed = 0;
476
1.31M
  if (ctxt->recovery == 0)
477
394k
      ctxt->disableSAX = 1;
478
1.31M
    }
479
1.31M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
10.8M
{
495
10.8M
    xmlStructuredErrorFunc schannel = NULL;
496
497
10.8M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
10.8M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
10.8M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
10.8M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
7.76M
        schannel = ctxt->sax->serror;
503
10.8M
    if (ctxt != NULL) {
504
10.8M
        __xmlRaiseError(schannel,
505
10.8M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
10.8M
                    ctxt->userData,
507
10.8M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
10.8M
                    XML_ERR_WARNING, NULL, 0,
509
10.8M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
10.8M
        msg, (const char *) str1, (const char *) str2);
511
10.8M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
10.8M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
460k
{
533
460k
    xmlStructuredErrorFunc schannel = NULL;
534
535
460k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
460k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
460k
    if (ctxt != NULL) {
539
460k
  ctxt->errNo = error;
540
460k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
11.3k
      schannel = ctxt->sax->serror;
542
460k
    }
543
460k
    if (ctxt != NULL) {
544
460k
        __xmlRaiseError(schannel,
545
460k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
460k
                    ctxt, NULL, XML_FROM_DTD, error,
547
460k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
460k
        (const char *) str2, NULL, 0, 0,
549
460k
        msg, (const char *) str1, (const char *) str2);
550
460k
  ctxt->valid = 0;
551
460k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
460k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
1.19M
{
573
1.19M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
1.19M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
1.19M
    if (ctxt != NULL)
577
1.19M
  ctxt->errNo = error;
578
1.19M
    __xmlRaiseError(NULL, NULL, NULL,
579
1.19M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
1.19M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
1.19M
    if (ctxt != NULL) {
582
1.19M
  ctxt->wellFormed = 0;
583
1.19M
  if (ctxt->recovery == 0)
584
233k
      ctxt->disableSAX = 1;
585
1.19M
    }
586
1.19M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
420k
{
604
420k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
420k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
420k
    if (ctxt != NULL)
608
420k
  ctxt->errNo = error;
609
420k
    __xmlRaiseError(NULL, NULL, NULL,
610
420k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
420k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
420k
        NULL, val, 0, msg, str1, val, str2);
613
420k
    if (ctxt != NULL) {
614
420k
  ctxt->wellFormed = 0;
615
420k
  if (ctxt->recovery == 0)
616
146k
      ctxt->disableSAX = 1;
617
420k
    }
618
420k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
4.49M
{
633
4.49M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
4.49M
        (ctxt->instate == XML_PARSER_EOF))
635
43
  return;
636
4.49M
    if (ctxt != NULL)
637
4.49M
  ctxt->errNo = error;
638
4.49M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
4.49M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
4.49M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
4.49M
                    val);
642
4.49M
    if (ctxt != NULL) {
643
4.49M
  ctxt->wellFormed = 0;
644
4.49M
  if (ctxt->recovery == 0)
645
2.17M
      ctxt->disableSAX = 1;
646
4.49M
    }
647
4.49M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
311k
{
662
311k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
311k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
311k
    if (ctxt != NULL)
666
311k
  ctxt->errNo = error;
667
311k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
311k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
311k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
311k
                    val);
671
311k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
319k
{
689
319k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
319k
        (ctxt->instate == XML_PARSER_EOF))
691
131
  return;
692
318k
    if (ctxt != NULL)
693
318k
  ctxt->errNo = error;
694
318k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
318k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
318k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
318k
                    info1, info2, info3);
698
318k
    if (ctxt != NULL)
699
318k
  ctxt->nsWellFormed = 0;
700
318k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
6.53k
{
718
6.53k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
6.53k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
6.53k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
6.53k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
6.53k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
6.53k
                    info1, info2, info3);
725
6.53k
}
726
727
static void
728
233M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
233M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
233M
    else
732
233M
        *dst += val;
733
233M
}
734
735
static void
736
68.6M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
68.6M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
68.6M
    else
740
68.6M
        *dst += val;
741
68.6M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
67.6M
{
770
67.6M
    unsigned long consumed;
771
67.6M
    xmlParserInputPtr input = ctxt->input;
772
67.6M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
67.6M
    consumed = input->parentConsumed;
779
67.6M
    if ((entity == NULL) ||
780
67.6M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
38.8M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
28.8M
        xmlSaturatedAdd(&consumed, input->consumed);
783
28.8M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
28.8M
    }
785
67.6M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
67.6M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
67.6M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
67.6M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
67.6M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
714
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
714
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
714
                       "Maximum entity amplification factor exceeded");
803
714
        xmlHaltParser(ctxt);
804
714
        return(1);
805
714
    }
806
807
67.6M
    return(0);
808
67.6M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
763k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
763k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
763k
    (void) sax;
1048
1049
763k
    if (ctxt == NULL) return;
1050
763k
    sax = ctxt->sax;
1051
763k
#ifdef LIBXML_SAX1_ENABLED
1052
763k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
763k
        ((sax->startElementNs != NULL) ||
1054
476k
         (sax->endElementNs != NULL) ||
1055
476k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
476k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
763k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
763k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
763k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
763k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
763k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
763k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
120k
{
1103
120k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
150k
    while (*src == 0x20) src++;
1107
2.03M
    while (*src != 0) {
1108
1.91M
  if (*src == 0x20) {
1109
360k
      while (*src == 0x20) src++;
1110
125k
      if (*src != 0)
1111
110k
    *dst++ = 0x20;
1112
1.78M
  } else {
1113
1.78M
      *dst++ = *src++;
1114
1.78M
  }
1115
1.91M
    }
1116
120k
    *dst = 0;
1117
120k
    if (dst == src)
1118
101k
       return(NULL);
1119
19.2k
    return(dst);
1120
120k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
22.8k
{
1136
22.8k
    int i;
1137
22.8k
    int remove_head = 0;
1138
22.8k
    int need_realloc = 0;
1139
22.8k
    const xmlChar *cur;
1140
1141
22.8k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
22.8k
    i = *len;
1144
22.8k
    if (i <= 0)
1145
789
        return(NULL);
1146
1147
22.0k
    cur = src;
1148
31.9k
    while (*cur == 0x20) {
1149
9.86k
        cur++;
1150
9.86k
  remove_head++;
1151
9.86k
    }
1152
638k
    while (*cur != 0) {
1153
621k
  if (*cur == 0x20) {
1154
52.4k
      cur++;
1155
52.4k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
4.58k
          need_realloc = 1;
1157
4.58k
    break;
1158
4.58k
      }
1159
52.4k
  } else
1160
568k
      cur++;
1161
621k
    }
1162
22.0k
    if (need_realloc) {
1163
4.58k
        xmlChar *ret;
1164
1165
4.58k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
4.58k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
4.58k
  xmlAttrNormalizeSpace(ret, ret);
1171
4.58k
  *len = strlen((const char *)ret);
1172
4.58k
        return(ret);
1173
17.4k
    } else if (remove_head) {
1174
611
        *len -= remove_head;
1175
611
        memmove(src, src + remove_head, 1 + *len);
1176
611
  return(src);
1177
611
    }
1178
16.8k
    return(NULL);
1179
22.0k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
138k
               const xmlChar *value) {
1195
138k
    xmlDefAttrsPtr defaults;
1196
138k
    int len;
1197
138k
    const xmlChar *name;
1198
138k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
138k
    if (ctxt->attsSpecial != NULL) {
1204
120k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
26.5k
      return;
1206
120k
    }
1207
1208
112k
    if (ctxt->attsDefault == NULL) {
1209
24.0k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
24.0k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
24.0k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
112k
    name = xmlSplitQName3(fullname, &len);
1219
112k
    if (name == NULL) {
1220
104k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
104k
  prefix = NULL;
1222
104k
    } else {
1223
7.70k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
7.70k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
7.70k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
112k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
112k
    if (defaults == NULL) {
1232
62.2k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
62.2k
                     (4 * 5) * sizeof(const xmlChar *));
1234
62.2k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
62.2k
  defaults->nbAttrs = 0;
1237
62.2k
  defaults->maxAttrs = 4;
1238
62.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
62.2k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
62.2k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
3.09k
        xmlDefAttrsPtr temp;
1245
1246
3.09k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
3.09k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
3.09k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
3.09k
  defaults = temp;
1251
3.09k
  defaults->maxAttrs *= 2;
1252
3.09k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
3.09k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
3.09k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
112k
    name = xmlSplitQName3(fullattr, &len);
1264
112k
    if (name == NULL) {
1265
94.6k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
94.6k
  prefix = NULL;
1267
94.6k
    } else {
1268
17.7k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
17.7k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
17.7k
    }
1271
1272
112k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
112k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
112k
    len = xmlStrlen(value);
1276
112k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
112k
    if (value == NULL)
1278
0
        goto mem_error;
1279
112k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
112k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
112k
    if (ctxt->external)
1282
24.4k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
88.0k
    else
1284
88.0k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
112k
    defaults->nbAttrs++;
1286
1287
112k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
112k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
1.67M
{
1309
1.67M
    if (ctxt->attsSpecial == NULL) {
1310
42.4k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
42.4k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
42.4k
    }
1314
1315
1.67M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
365k
        return;
1317
1318
1.31M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
1.31M
                     (void *) (ptrdiff_t) type);
1320
1.31M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
1.67M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
864k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
864k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
864k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
352k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
352k
    }
1341
864k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
195k
{
1354
195k
    if (ctxt->attsSpecial == NULL)
1355
161k
        return;
1356
1357
33.6k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
33.6k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
10.1k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
10.1k
        ctxt->attsSpecial = NULL;
1362
10.1k
    }
1363
33.6k
    return;
1364
195k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
18.5k
{
1427
18.5k
    const xmlChar *cur = lang, *nxt;
1428
1429
18.5k
    if (cur == NULL)
1430
374
        return (0);
1431
18.1k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
18.1k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
18.1k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
18.1k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
1.17k
        cur += 2;
1441
18.4k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
18.4k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
17.2k
            cur++;
1444
1.17k
        return(cur[0] == 0);
1445
1.17k
    }
1446
16.9k
    nxt = cur;
1447
74.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
74.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
57.8k
           nxt++;
1450
16.9k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
2.08k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
1.78k
            return(0);
1456
298
        return(1);
1457
2.08k
    }
1458
14.9k
    if (nxt - cur < 2)
1459
1.30k
        return(0);
1460
    /* we got an ISO 639 code */
1461
13.5k
    if (nxt[0] == 0)
1462
3.86k
        return(1);
1463
9.73k
    if (nxt[0] != '-')
1464
1.02k
        return(0);
1465
1466
8.70k
    nxt++;
1467
8.70k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
8.70k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
869
        goto region_m49;
1471
1472
38.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
38.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
31.1k
           nxt++;
1475
7.83k
    if (nxt - cur == 4)
1476
2.08k
        goto script;
1477
5.75k
    if (nxt - cur == 2)
1478
1.79k
        goto region;
1479
3.95k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
519
        goto variant;
1481
3.43k
    if (nxt - cur != 3)
1482
549
        return(0);
1483
    /* we parsed an extlang */
1484
2.88k
    if (nxt[0] == 0)
1485
232
        return(1);
1486
2.65k
    if (nxt[0] != '-')
1487
344
        return(0);
1488
1489
2.30k
    nxt++;
1490
2.30k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
2.30k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
329
        goto region_m49;
1494
1495
13.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
13.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
11.8k
           nxt++;
1498
1.97k
    if (nxt - cur == 2)
1499
376
        goto region;
1500
1.60k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
189
        goto variant;
1502
1.41k
    if (nxt - cur != 4)
1503
834
        return(0);
1504
    /* we parsed a script */
1505
2.66k
script:
1506
2.66k
    if (nxt[0] == 0)
1507
449
        return(1);
1508
2.21k
    if (nxt[0] != '-')
1509
410
        return(0);
1510
1511
1.80k
    nxt++;
1512
1.80k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
1.80k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
191
        goto region_m49;
1516
1517
14.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
14.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
13.0k
           nxt++;
1520
1521
1.61k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
334
        goto variant;
1523
1.27k
    if (nxt - cur != 2)
1524
930
        return(0);
1525
    /* we parsed a region */
1526
3.33k
region:
1527
3.33k
    if (nxt[0] == 0)
1528
1.44k
        return(1);
1529
1.88k
    if (nxt[0] != '-')
1530
1.27k
        return(0);
1531
1532
608
    nxt++;
1533
608
    cur = nxt;
1534
    /* now we can just have a variant */
1535
6.30k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
6.30k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
5.70k
           nxt++;
1538
1539
608
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
355
        return(0);
1541
1542
    /* we parsed a variant */
1543
1.29k
variant:
1544
1.29k
    if (nxt[0] == 0)
1545
413
        return(1);
1546
882
    if (nxt[0] != '-')
1547
611
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
271
    return (1);
1550
1551
1.38k
region_m49:
1552
1.38k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
1.38k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
811
        nxt += 3;
1555
811
        goto region;
1556
811
    }
1557
578
    return(0);
1558
1.38k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
122k
{
1584
122k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
67.7k
        int i;
1586
143k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
96.2k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
20.6k
          if (ctxt->nsTab[i + 1] == URL)
1590
6.60k
        return(-2);
1591
    /* out of scope keep it */
1592
14.0k
    break;
1593
20.6k
      }
1594
96.2k
  }
1595
67.7k
    }
1596
116k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
41.6k
  ctxt->nsMax = 10;
1598
41.6k
  ctxt->nsNr = 0;
1599
41.6k
  ctxt->nsTab = (const xmlChar **)
1600
41.6k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
41.6k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
74.4k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
3.74k
        const xmlChar ** tmp;
1608
3.74k
        ctxt->nsMax *= 2;
1609
3.74k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
3.74k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
3.74k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
3.74k
  ctxt->nsTab = tmp;
1617
3.74k
    }
1618
116k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
116k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
116k
    return (ctxt->nsNr);
1621
116k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
37.9k
{
1634
37.9k
    int i;
1635
1636
37.9k
    if (ctxt->nsTab == NULL) return(0);
1637
37.9k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
37.9k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
153k
    for (i = 0;i < nr;i++) {
1645
115k
         ctxt->nsNr--;
1646
115k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
115k
    }
1648
37.9k
    return(nr);
1649
37.9k
}
1650
#endif
1651
1652
static int
1653
96.0k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
96.0k
    const xmlChar **atts;
1655
96.0k
    int *attallocs;
1656
96.0k
    int maxatts;
1657
1658
96.0k
    if (nr + 5 > ctxt->maxatts) {
1659
96.0k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
96.0k
  atts = (const xmlChar **) xmlMalloc(
1661
96.0k
             maxatts * sizeof(const xmlChar *));
1662
96.0k
  if (atts == NULL) goto mem_error;
1663
96.0k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
96.0k
                               (maxatts / 5) * sizeof(int));
1665
96.0k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
96.0k
        if (ctxt->maxatts > 0)
1670
527
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
96.0k
        xmlFree(ctxt->atts);
1672
96.0k
  ctxt->atts = atts;
1673
96.0k
  ctxt->attallocs = attallocs;
1674
96.0k
  ctxt->maxatts = maxatts;
1675
96.0k
    }
1676
96.0k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
96.0k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
39.3M
{
1694
39.3M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
39.3M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
1.82k
        size_t newSize = ctxt->inputMax * 2;
1698
1.82k
        xmlParserInputPtr *tmp;
1699
1700
1.82k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
1.82k
                                               newSize * sizeof(*tmp));
1702
1.82k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
1.82k
        ctxt->inputTab = tmp;
1707
1.82k
        ctxt->inputMax = newSize;
1708
1.82k
    }
1709
39.3M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
39.3M
    ctxt->input = value;
1711
39.3M
    return (ctxt->inputNr++);
1712
39.3M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
41.3M
{
1724
41.3M
    xmlParserInputPtr ret;
1725
1726
41.3M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
41.3M
    if (ctxt->inputNr <= 0)
1729
2.04M
        return (NULL);
1730
39.3M
    ctxt->inputNr--;
1731
39.3M
    if (ctxt->inputNr > 0)
1732
38.7M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
547k
    else
1734
547k
        ctxt->input = NULL;
1735
39.3M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
39.3M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
39.3M
    return (ret);
1738
41.3M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
7.94M
{
1751
7.94M
    if (ctxt == NULL) return(0);
1752
7.94M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
7.61k
        xmlNodePtr *tmp;
1754
1755
7.61k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
7.61k
                                      ctxt->nodeMax * 2 *
1757
7.61k
                                      sizeof(ctxt->nodeTab[0]));
1758
7.61k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
7.61k
        ctxt->nodeTab = tmp;
1763
7.61k
  ctxt->nodeMax *= 2;
1764
7.61k
    }
1765
7.94M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
7.94M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
7.94M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
7.94M
    ctxt->node = value;
1775
7.94M
    return (ctxt->nodeNr++);
1776
7.94M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
7.52M
{
1789
7.52M
    xmlNodePtr ret;
1790
1791
7.52M
    if (ctxt == NULL) return(NULL);
1792
7.52M
    if (ctxt->nodeNr <= 0)
1793
98.2k
        return (NULL);
1794
7.42M
    ctxt->nodeNr--;
1795
7.42M
    if (ctxt->nodeNr > 0)
1796
7.11M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
307k
    else
1798
307k
        ctxt->node = NULL;
1799
7.42M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
7.42M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
7.42M
    return (ret);
1802
7.52M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
6.64M
{
1821
6.64M
    xmlStartTag *tag;
1822
1823
6.64M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
17.3k
        const xmlChar * *tmp;
1825
17.3k
        xmlStartTag *tmp2;
1826
17.3k
        ctxt->nameMax *= 2;
1827
17.3k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
17.3k
                                    ctxt->nameMax *
1829
17.3k
                                    sizeof(ctxt->nameTab[0]));
1830
17.3k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
17.3k
  ctxt->nameTab = tmp;
1835
17.3k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
17.3k
                                    ctxt->nameMax *
1837
17.3k
                                    sizeof(ctxt->pushTab[0]));
1838
17.3k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
17.3k
  ctxt->pushTab = tmp2;
1843
6.62M
    } else if (ctxt->pushTab == NULL) {
1844
295k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
295k
                                            sizeof(ctxt->pushTab[0]));
1846
295k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
295k
    }
1849
6.64M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
6.64M
    ctxt->name = value;
1851
6.64M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
6.64M
    tag->prefix = prefix;
1853
6.64M
    tag->URI = URI;
1854
6.64M
    tag->line = line;
1855
6.64M
    tag->nsNr = nsNr;
1856
6.64M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
6.64M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.42M
{
1873
1.42M
    const xmlChar *ret;
1874
1875
1.42M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.42M
    ctxt->nameNr--;
1878
1.42M
    if (ctxt->nameNr > 0)
1879
1.40M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
25.4k
    else
1881
25.4k
        ctxt->name = NULL;
1882
1.42M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.42M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.42M
    return (ret);
1885
1.42M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
4.53M
{
1931
4.53M
    const xmlChar *ret;
1932
1933
4.53M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
4.53M
    ctxt->nameNr--;
1936
4.53M
    if (ctxt->nameNr > 0)
1937
4.28M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
257k
    else
1939
257k
        ctxt->name = NULL;
1940
4.53M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
4.53M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
4.53M
    return (ret);
1943
4.53M
}
1944
1945
9.03M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
9.03M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
11.8k
        int *tmp;
1948
1949
11.8k
  ctxt->spaceMax *= 2;
1950
11.8k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
11.8k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
11.8k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
11.8k
  ctxt->spaceTab = tmp;
1958
11.8k
    }
1959
9.03M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
9.03M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
9.03M
    return(ctxt->spaceNr++);
1962
9.03M
}
1963
1964
8.63M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
8.63M
    int ret;
1966
8.63M
    if (ctxt->spaceNr <= 0) return(0);
1967
8.62M
    ctxt->spaceNr--;
1968
8.62M
    if (ctxt->spaceNr > 0)
1969
8.57M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
51.0k
    else
1971
51.0k
        ctxt->space = &ctxt->spaceTab[0];
1972
8.62M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
8.62M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
8.62M
    return(ret);
1975
8.63M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
428M
#define RAW (*ctxt->input->cur)
2013
374M
#define CUR (*ctxt->input->cur)
2014
313M
#define NXT(val) ctxt->input->cur[(val)]
2015
18.5M
#define CUR_PTR ctxt->input->cur
2016
432k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
74.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
37.5M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
68.4M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
57.8M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
49.0M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
39.8M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
17.5M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
17.5M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
115k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
115k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
135M
#define SKIP(val) do {             \
2037
135M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
135M
    if (*ctxt->input->cur == 0)           \
2039
135M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
135M
  } while (0)
2041
2042
44.7k
#define SKIPL(val) do {             \
2043
44.7k
    int skipl;                \
2044
10.3M
    for(skipl=0; skipl<val; skipl++) {         \
2045
10.2M
  if (*(ctxt->input->cur) == '\n') {       \
2046
215k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
10.0M
  } else ctxt->input->col++;         \
2048
10.2M
  ctxt->input->cur++;           \
2049
10.2M
    }                  \
2050
44.7k
    if (*ctxt->input->cur == 0)           \
2051
44.7k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
44.7k
  } while (0)
2053
2054
126M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
126M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
126M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
126M
  xmlSHRINK (ctxt);
2058
2059
1.37M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
1.37M
    if ((ctxt->input->buf) &&
2062
1.37M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
7.57k
        xmlParserInputShrink(ctxt->input);
2064
1.37M
    if (*ctxt->input->cur == 0)
2065
59.9k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
1.37M
}
2067
2068
425M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
425M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
425M
  xmlGROW (ctxt);
2071
2072
88.2M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
88.2M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
88.2M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
88.2M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
88.2M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
88.2M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
88.2M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
88.2M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
88.2M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
88.2M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
88.2M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
1.69M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
88.2M
}
2095
2096
108M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
231M
#define NEXT xmlNextChar(ctxt)
2099
2100
15.3M
#define NEXT1 {               \
2101
15.3M
  ctxt->input->col++;           \
2102
15.3M
  ctxt->input->cur++;           \
2103
15.3M
  if (*ctxt->input->cur == 0)         \
2104
15.3M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
15.3M
    }
2106
2107
191M
#define NEXTL(l) do {             \
2108
191M
    if (*(ctxt->input->cur) == '\n') {         \
2109
3.71M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
187M
    } else ctxt->input->col++;           \
2111
191M
    ctxt->input->cur += l;        \
2112
191M
  } while (0)
2113
2114
201M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
2.04G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
2.16G
    if (l == 1) b[i++] = v;           \
2119
2.16G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
108M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
108M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
108M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
108M
        (ctxt->instate == XML_PARSER_START)) {
2141
39.6M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
39.6M
  cur = ctxt->input->cur;
2146
39.6M
  while (IS_BLANK_CH(*cur)) {
2147
16.7M
      if (*cur == '\n') {
2148
830k
    ctxt->input->line++; ctxt->input->col = 1;
2149
15.9M
      } else {
2150
15.9M
    ctxt->input->col++;
2151
15.9M
      }
2152
16.7M
      cur++;
2153
16.7M
      if (res < INT_MAX)
2154
16.7M
    res++;
2155
16.7M
      if (*cur == 0) {
2156
64.1k
    ctxt->input->cur = cur;
2157
64.1k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
64.1k
    cur = ctxt->input->cur;
2159
64.1k
      }
2160
16.7M
  }
2161
39.6M
  ctxt->input->cur = cur;
2162
68.6M
    } else {
2163
68.6M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
233M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
233M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
76.3M
    NEXT;
2168
157M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
50.3M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
449k
                    break;
2174
49.9M
          xmlParsePEReference(ctxt);
2175
106M
            } else if (CUR == 0) {
2176
38.8M
                unsigned long consumed;
2177
38.8M
                xmlEntityPtr ent;
2178
2179
38.8M
                if (ctxt->inputNr <= 1)
2180
28.4k
                    break;
2181
2182
38.7M
                consumed = ctxt->input->consumed;
2183
38.7M
                xmlSaturatedAddSizeT(&consumed,
2184
38.7M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
38.7M
                ent = ctxt->input->entity;
2191
38.7M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
38.7M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
13.1k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
13.1k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
13.1k
                }
2197
2198
38.7M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
38.7M
                xmlPopInput(ctxt);
2201
68.1M
            } else {
2202
68.1M
                break;
2203
68.1M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
165M
      if (res < INT_MAX)
2213
165M
    res++;
2214
165M
        }
2215
68.6M
    }
2216
108M
    return(res);
2217
108M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
38.7M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
38.7M
    xmlParserInputPtr input;
2237
2238
38.7M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
38.7M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
38.7M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
38.7M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
38.7M
    input = inputPop(ctxt);
2247
38.7M
    if (input->entity != NULL)
2248
38.7M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
38.7M
    xmlFreeInputStream(input);
2250
38.7M
    if (*ctxt->input->cur == 0)
2251
18.3M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
38.7M
    return(CUR);
2253
38.7M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
38.8M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
38.8M
    int ret;
2267
38.8M
    if (input == NULL) return(-1);
2268
2269
38.8M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
38.8M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
38.8M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
38.8M
    ret = inputPush(ctxt, input);
2285
38.8M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
38.8M
    GROW;
2288
38.8M
    return(ret);
2289
38.8M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
358k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
358k
    int val = 0;
2311
358k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
358k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
358k
        (NXT(2) == 'x')) {
2318
140k
  SKIP(3);
2319
140k
  GROW;
2320
510k
  while (RAW != ';') { /* loop blocked by count */
2321
383k
      if (count++ > 20) {
2322
18.0k
    count = 0;
2323
18.0k
    GROW;
2324
18.0k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
18.0k
      }
2327
383k
      if ((RAW >= '0') && (RAW <= '9'))
2328
253k
          val = val * 16 + (CUR - '0');
2329
130k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
100k
          val = val * 16 + (CUR - 'a') + 10;
2331
29.1k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
14.8k
          val = val * 16 + (CUR - 'A') + 10;
2333
14.2k
      else {
2334
14.2k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
14.2k
    val = 0;
2336
14.2k
    break;
2337
14.2k
      }
2338
369k
      if (val > 0x110000)
2339
199k
          val = 0x110000;
2340
2341
369k
      NEXT;
2342
369k
      count++;
2343
369k
  }
2344
140k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
126k
      ctxt->input->col++;
2347
126k
      ctxt->input->cur++;
2348
126k
  }
2349
217k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
217k
  SKIP(2);
2351
217k
  GROW;
2352
1.02M
  while (RAW != ';') { /* loop blocked by count */
2353
833k
      if (count++ > 20) {
2354
23.1k
    count = 0;
2355
23.1k
    GROW;
2356
23.1k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
23.1k
      }
2359
833k
      if ((RAW >= '0') && (RAW <= '9'))
2360
807k
          val = val * 10 + (CUR - '0');
2361
25.4k
      else {
2362
25.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
25.4k
    val = 0;
2364
25.4k
    break;
2365
25.4k
      }
2366
807k
      if (val > 0x110000)
2367
257k
          val = 0x110000;
2368
2369
807k
      NEXT;
2370
807k
      count++;
2371
807k
  }
2372
217k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
192k
      ctxt->input->col++;
2375
192k
      ctxt->input->cur++;
2376
192k
  }
2377
217k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
358k
    if (val >= 0x110000) {
2389
1.71k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
1.71k
                "xmlParseCharRef: character reference out of bounds\n",
2391
1.71k
          val);
2392
356k
    } else if (IS_CHAR(val)) {
2393
313k
        return(val);
2394
313k
    } else {
2395
42.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
42.9k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
42.9k
                    val);
2398
42.9k
    }
2399
44.6k
    return(0);
2400
358k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
221k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
221k
    const xmlChar *ptr;
2423
221k
    xmlChar cur;
2424
221k
    int val = 0;
2425
2426
221k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
221k
    ptr = *str;
2428
221k
    cur = *ptr;
2429
221k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
39.7k
  ptr += 3;
2431
39.7k
  cur = *ptr;
2432
118k
  while (cur != ';') { /* Non input consuming loop */
2433
80.4k
      if ((cur >= '0') && (cur <= '9'))
2434
42.2k
          val = val * 16 + (cur - '0');
2435
38.2k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
8.87k
          val = val * 16 + (cur - 'a') + 10;
2437
29.3k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
27.4k
          val = val * 16 + (cur - 'A') + 10;
2439
1.88k
      else {
2440
1.88k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
1.88k
    val = 0;
2442
1.88k
    break;
2443
1.88k
      }
2444
78.5k
      if (val > 0x110000)
2445
30.7k
          val = 0x110000;
2446
2447
78.5k
      ptr++;
2448
78.5k
      cur = *ptr;
2449
78.5k
  }
2450
39.7k
  if (cur == ';')
2451
37.8k
      ptr++;
2452
181k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
181k
  ptr += 2;
2454
181k
  cur = *ptr;
2455
602k
  while (cur != ';') { /* Non input consuming loops */
2456
423k
      if ((cur >= '0') && (cur <= '9'))
2457
421k
          val = val * 10 + (cur - '0');
2458
2.52k
      else {
2459
2.52k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
2.52k
    val = 0;
2461
2.52k
    break;
2462
2.52k
      }
2463
421k
      if (val > 0x110000)
2464
7.68k
          val = 0x110000;
2465
2466
421k
      ptr++;
2467
421k
      cur = *ptr;
2468
421k
  }
2469
181k
  if (cur == ';')
2470
178k
      ptr++;
2471
181k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
221k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
221k
    if (val >= 0x110000) {
2483
345
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
345
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
345
                val);
2486
220k
    } else if (IS_CHAR(val)) {
2487
214k
        return(val);
2488
214k
    } else {
2489
5.92k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
5.92k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
5.92k
        val);
2492
5.92k
    }
2493
6.27k
    return(0);
2494
221k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.72M
#define growBuffer(buffer, n) {           \
2593
1.72M
    xmlChar *tmp;             \
2594
1.72M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.72M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.72M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.72M
    if (tmp == NULL) goto mem_error;         \
2598
1.72M
    buffer = tmp;             \
2599
1.72M
    buffer##_size = new_size;                                           \
2600
1.72M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
27.4M
                           int check) {
2617
27.4M
    xmlChar *buffer = NULL;
2618
27.4M
    size_t buffer_size = 0;
2619
27.4M
    size_t nbchars = 0;
2620
2621
27.4M
    xmlChar *current = NULL;
2622
27.4M
    xmlChar *rep = NULL;
2623
27.4M
    const xmlChar *last;
2624
27.4M
    xmlEntityPtr ent;
2625
27.4M
    int c,l;
2626
2627
27.4M
    if (str == NULL)
2628
17.2k
        return(NULL);
2629
27.4M
    last = str + len;
2630
2631
27.4M
    if (((ctxt->depth > 40) &&
2632
27.4M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
27.4M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
27.4M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
27.4M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
27.4M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
27.4M
    if (str < last)
2651
27.3M
  c = CUR_SCHAR(str, l);
2652
89.3k
    else
2653
89.3k
        c = 0;
2654
1.53G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
1.53G
           (c != end2) && (c != end3) &&
2656
1.53G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
1.50G
  if (c == 0) break;
2659
1.50G
        if ((c == '&') && (str[1] == '#')) {
2660
221k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
221k
      if (val == 0)
2662
6.27k
                goto int_error;
2663
214k
      COPY_BUF(0,buffer,nbchars,val);
2664
214k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
460
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
460
      }
2667
1.50G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
25.2M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
25.2M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
25.2M
      if ((ent != NULL) &&
2674
25.2M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
74.4k
    if (ent->content != NULL) {
2676
74.4k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
74.4k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
3.03k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
3.03k
        }
2680
74.4k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
25.1M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
24.5M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
181
                    goto int_error;
2688
2689
24.5M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
399
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
399
                    xmlHaltParser(ctxt);
2692
399
                    ent->content[0] = 0;
2693
399
                    goto int_error;
2694
399
                }
2695
2696
24.5M
                ent->flags |= XML_ENT_EXPANDING;
2697
24.5M
    ctxt->depth++;
2698
24.5M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
24.5M
                        ent->length, what, 0, 0, 0, check);
2700
24.5M
    ctxt->depth--;
2701
24.5M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
24.5M
    if (rep == NULL) {
2704
3.43k
                    ent->content[0] = 0;
2705
3.43k
                    goto int_error;
2706
3.43k
                }
2707
2708
24.5M
                current = rep;
2709
3.75G
                while (*current != 0) { /* non input consuming loop */
2710
3.72G
                    buffer[nbchars++] = *current++;
2711
3.72G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
2.85M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
2.85M
                    }
2714
3.72G
                }
2715
24.5M
                xmlFree(rep);
2716
24.5M
                rep = NULL;
2717
24.5M
      } else if (ent != NULL) {
2718
67.8k
    int i = xmlStrlen(ent->name);
2719
67.8k
    const xmlChar *cur = ent->name;
2720
2721
67.8k
    buffer[nbchars++] = '&';
2722
67.8k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
2.17k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
2.17k
    }
2725
168k
    for (;i > 0;i--)
2726
100k
        buffer[nbchars++] = *cur++;
2727
67.8k
    buffer[nbchars++] = ';';
2728
67.8k
      }
2729
1.48G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
1.20M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
1.20M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
1.20M
      if (ent != NULL) {
2735
686k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
5.09k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
5.09k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
5.09k
      (ctxt->validate != 0)) {
2745
4.38k
      xmlLoadEntityContent(ctxt, ent);
2746
4.38k
        } else {
2747
707
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
707
      "not validating will not read content for PE entity %s\n",
2749
707
                          ent->name, NULL);
2750
707
        }
2751
5.09k
    }
2752
2753
686k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
87
                    goto int_error;
2755
2756
686k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
240
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
240
                    xmlHaltParser(ctxt);
2759
240
                    if (ent->content != NULL)
2760
147
                        ent->content[0] = 0;
2761
240
                    goto int_error;
2762
240
                }
2763
2764
685k
                ent->flags |= XML_ENT_EXPANDING;
2765
685k
    ctxt->depth++;
2766
685k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
685k
                        ent->length, what, 0, 0, 0, check);
2768
685k
    ctxt->depth--;
2769
685k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
685k
    if (rep == NULL) {
2772
3.70k
                    if (ent->content != NULL)
2773
396
                        ent->content[0] = 0;
2774
3.70k
                    goto int_error;
2775
3.70k
                }
2776
682k
                current = rep;
2777
795M
                while (*current != 0) { /* non input consuming loop */
2778
794M
                    buffer[nbchars++] = *current++;
2779
794M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
150k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
150k
                    }
2782
794M
                }
2783
682k
                xmlFree(rep);
2784
682k
                rep = NULL;
2785
682k
      }
2786
1.48G
  } else {
2787
1.48G
      COPY_BUF(l,buffer,nbchars,c);
2788
1.48G
      str += l;
2789
1.48G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
306k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
306k
      }
2792
1.48G
  }
2793
1.50G
  if (str < last)
2794
1.48G
      c = CUR_SCHAR(str, l);
2795
27.3M
  else
2796
27.3M
      c = 0;
2797
1.50G
    }
2798
27.4M
    buffer[nbchars] = 0;
2799
27.4M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
14.3k
int_error:
2804
14.3k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
14.3k
    if (buffer != NULL)
2807
14.3k
        xmlFree(buffer);
2808
14.3k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
10.3k
                           xmlChar end3) {
2836
10.3k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
10.3k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
10.3k
                                      end, end2, end3, 0));
2840
10.3k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
186k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
186k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
186k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
186k
                                      end, end2, end3, 0));
2868
186k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
4.48M
                     int blank_chars) {
2890
4.48M
    int i, ret;
2891
4.48M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
4.48M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
415k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
4.07M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
4.07M
        (*(ctxt->space) == -2))
2905
1.60M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
2.46M
    if (blank_chars == 0) {
2911
5.93M
  for (i = 0;i < len;i++)
2912
5.10M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.13M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
2.16M
    if (ctxt->node == NULL) return(0);
2919
2.11M
    if (ctxt->myDoc != NULL) {
2920
2.11M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.11M
        if (ret == 0) return(1);
2922
1.94M
        if (ret == 1) return(0);
2923
1.94M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
1.92M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
1.89M
    if ((ctxt->node->children == NULL) &&
2930
1.89M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
1.89M
    lastChild = xmlGetLastChild(ctxt->node);
2933
1.89M
    if (lastChild == NULL) {
2934
350k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
350k
            (ctxt->node->content != NULL)) return(0);
2936
1.54M
    } else if (xmlNodeIsText(lastChild))
2937
28.6k
        return(0);
2938
1.51M
    else if ((ctxt->node->children != NULL) &&
2939
1.51M
             (xmlNodeIsText(ctxt->node->children)))
2940
17.0k
        return(0);
2941
1.85M
    return(1);
2942
1.89M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
7.82M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
7.82M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
7.82M
    xmlChar *buffer = NULL;
2973
7.82M
    int len = 0;
2974
7.82M
    int max = XML_MAX_NAMELEN;
2975
7.82M
    xmlChar *ret = NULL;
2976
7.82M
    const xmlChar *cur = name;
2977
7.82M
    int c;
2978
2979
7.82M
    if (prefix == NULL) return(NULL);
2980
7.82M
    *prefix = NULL;
2981
2982
7.82M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
7.82M
    if (cur[0] == ':')
2993
4.33k
  return(xmlStrdup(name));
2994
2995
7.81M
    c = *cur++;
2996
37.1M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
29.3M
  buf[len++] = c;
2998
29.3M
  c = *cur++;
2999
29.3M
    }
3000
7.81M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
3.15k
  max = len * 2;
3006
3007
3.15k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
3.15k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
3.15k
  memcpy(buffer, buf, len);
3013
3.15M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
3.15M
      if (len + 10 > max) {
3015
4.01k
          xmlChar *tmp;
3016
3017
4.01k
    max *= 2;
3018
4.01k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
4.01k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
4.01k
    buffer = tmp;
3025
4.01k
      }
3026
3.15M
      buffer[len++] = c;
3027
3.15M
      c = *cur++;
3028
3.15M
  }
3029
3.15k
  buffer[len] = 0;
3030
3.15k
    }
3031
3032
7.81M
    if ((c == ':') && (*cur == 0)) {
3033
5.01k
        if (buffer != NULL)
3034
215
      xmlFree(buffer);
3035
5.01k
  *prefix = NULL;
3036
5.01k
  return(xmlStrdup(name));
3037
5.01k
    }
3038
3039
7.81M
    if (buffer == NULL)
3040
7.80M
  ret = xmlStrndup(buf, len);
3041
2.93k
    else {
3042
2.93k
  ret = buffer;
3043
2.93k
  buffer = NULL;
3044
2.93k
  max = XML_MAX_NAMELEN;
3045
2.93k
    }
3046
3047
3048
7.81M
    if (c == ':') {
3049
503k
  c = *cur;
3050
503k
        *prefix = ret;
3051
503k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
503k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
503k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
503k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
503k
        (c == '_') || (c == ':'))) {
3063
7.20k
      int l;
3064
7.20k
      int first = CUR_SCHAR(cur, l);
3065
3066
7.20k
      if (!IS_LETTER(first) && (first != '_')) {
3067
3.44k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
3.44k
          "Name %s is not XML Namespace compliant\n",
3069
3.44k
          name);
3070
3.44k
      }
3071
7.20k
  }
3072
503k
  cur++;
3073
3074
3.22M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
2.71M
      buf[len++] = c;
3076
2.71M
      c = *cur++;
3077
2.71M
  }
3078
503k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
2.85k
      max = len * 2;
3084
3085
2.85k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
2.85k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
2.85k
      memcpy(buffer, buf, len);
3091
2.57M
      while (c != 0) { /* tested bigname2.xml */
3092
2.57M
    if (len + 10 > max) {
3093
2.90k
        xmlChar *tmp;
3094
3095
2.90k
        max *= 2;
3096
2.90k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
2.90k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
2.90k
        buffer = tmp;
3103
2.90k
    }
3104
2.57M
    buffer[len++] = c;
3105
2.57M
    c = *cur++;
3106
2.57M
      }
3107
2.85k
      buffer[len] = 0;
3108
2.85k
  }
3109
3110
503k
  if (buffer == NULL)
3111
500k
      ret = xmlStrndup(buf, len);
3112
2.85k
  else {
3113
2.85k
      ret = buffer;
3114
2.85k
  }
3115
503k
    }
3116
3117
7.81M
    return(ret);
3118
7.81M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
27.2M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
27.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
25.5M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
25.5M
      (((c >= 'a') && (c <= 'z')) ||
3160
25.5M
       ((c >= 'A') && (c <= 'Z')) ||
3161
25.5M
       (c == '_') || (c == ':') ||
3162
25.5M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
25.5M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
25.5M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
25.5M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
25.5M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
25.5M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
25.5M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
25.5M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
25.5M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
25.5M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
25.5M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
25.5M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
25.4M
      return(1);
3175
25.5M
    } else {
3176
1.64M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
1.56M
      return(1);
3178
1.64M
    }
3179
258k
    return(0);
3180
27.2M
}
3181
3182
static int
3183
518M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
518M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
510M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
510M
      (((c >= 'a') && (c <= 'z')) ||
3191
509M
       ((c >= 'A') && (c <= 'Z')) ||
3192
509M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
509M
       (c == '_') || (c == ':') ||
3194
509M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
509M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
509M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
509M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
509M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
509M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
509M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
509M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
509M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
509M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
509M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
509M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
509M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
509M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
509M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
484M
       return(1);
3210
510M
    } else {
3211
8.76M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
8.76M
            (c == '.') || (c == '-') ||
3213
8.76M
      (c == '_') || (c == ':') ||
3214
8.76M
      (IS_COMBINING(c)) ||
3215
8.76M
      (IS_EXTENDER(c)))
3216
6.82M
      return(1);
3217
8.76M
    }
3218
27.9M
    return(0);
3219
518M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
864k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
864k
    int len = 0, l;
3227
864k
    int c;
3228
864k
    int count = 0;
3229
864k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
268k
                    XML_MAX_TEXT_LENGTH :
3231
864k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
864k
    GROW;
3241
864k
    if (ctxt->instate == XML_PARSER_EOF)
3242
42
        return(NULL);
3243
864k
    c = CUR_CHAR(l);
3244
864k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
576k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
576k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
556k
         ((c >= 'A') && (c <= 'Z')) ||
3252
556k
         (c == '_') || (c == ':') ||
3253
556k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
556k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
556k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
556k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
556k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
556k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
556k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
556k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
556k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
556k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
556k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
556k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
231k
      return(NULL);
3266
231k
  }
3267
345k
  len += l;
3268
345k
  NEXTL(l);
3269
345k
  c = CUR_CHAR(l);
3270
4.64M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
4.64M
         (((c >= 'a') && (c <= 'z')) ||
3272
4.61M
          ((c >= 'A') && (c <= 'Z')) ||
3273
4.61M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
4.61M
          (c == '_') || (c == ':') ||
3275
4.61M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
4.61M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
4.61M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
4.61M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
4.61M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
4.61M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
4.61M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
4.61M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
4.61M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
4.61M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
4.61M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
4.61M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
4.61M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
4.61M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
4.61M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
4.61M
    )) {
3291
4.29M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
27.1k
    count = 0;
3293
27.1k
    GROW;
3294
27.1k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
27.1k
      }
3297
4.29M
            if (len <= INT_MAX - l)
3298
4.29M
          len += l;
3299
4.29M
      NEXTL(l);
3300
4.29M
      c = CUR_CHAR(l);
3301
4.29M
  }
3302
345k
    } else {
3303
288k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
288k
      (!IS_LETTER(c) && (c != '_') &&
3305
273k
       (c != ':'))) {
3306
181k
      return(NULL);
3307
181k
  }
3308
107k
  len += l;
3309
107k
  NEXTL(l);
3310
107k
  c = CUR_CHAR(l);
3311
3312
3.17M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
3.17M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
3.15M
    (c == '.') || (c == '-') ||
3315
3.15M
    (c == '_') || (c == ':') ||
3316
3.15M
    (IS_COMBINING(c)) ||
3317
3.15M
    (IS_EXTENDER(c)))) {
3318
3.06M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
24.6k
    count = 0;
3320
24.6k
    GROW;
3321
24.6k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
24.6k
      }
3324
3.06M
            if (len <= INT_MAX - l)
3325
3.06M
          len += l;
3326
3.06M
      NEXTL(l);
3327
3.06M
      c = CUR_CHAR(l);
3328
3.06M
  }
3329
107k
    }
3330
452k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
452k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
452k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
1.76k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
450k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
452k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
71.7M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
71.7M
    const xmlChar *in;
3370
71.7M
    const xmlChar *ret;
3371
71.7M
    size_t count = 0;
3372
71.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
23.5M
                       XML_MAX_TEXT_LENGTH :
3374
71.7M
                       XML_MAX_NAME_LENGTH;
3375
3376
71.7M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
71.7M
    in = ctxt->input->cur;
3386
71.7M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
71.7M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
71.7M
  (*in == '_') || (*in == ':')) {
3389
71.3M
  in++;
3390
306M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
306M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
306M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
306M
         (*in == '_') || (*in == '-') ||
3394
306M
         (*in == ':') || (*in == '.'))
3395
235M
      in++;
3396
71.3M
  if ((*in > 0) && (*in < 0x80)) {
3397
70.9M
      count = in - ctxt->input->cur;
3398
70.9M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
70.9M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
70.9M
      ctxt->input->cur = in;
3404
70.9M
      ctxt->input->col += count;
3405
70.9M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
70.9M
      return(ret);
3408
70.9M
  }
3409
71.3M
    }
3410
    /* accelerator for special cases */
3411
864k
    return(xmlParseNameComplex(ctxt));
3412
71.7M
}
3413
3414
static const xmlChar *
3415
351k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
351k
    int len = 0, l;
3417
351k
    int c;
3418
351k
    int count = 0;
3419
351k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
133k
                    XML_MAX_TEXT_LENGTH :
3421
351k
                    XML_MAX_NAME_LENGTH;
3422
351k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
351k
    GROW;
3432
351k
    startPosition = CUR_PTR - BASE_PTR;
3433
351k
    c = CUR_CHAR(l);
3434
351k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
351k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
269k
  return(NULL);
3437
269k
    }
3438
3439
2.97M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
2.97M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
2.89M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
23.5k
      count = 0;
3443
23.5k
      GROW;
3444
23.5k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
23.5k
  }
3447
2.89M
        if (len <= INT_MAX - l)
3448
2.89M
      len += l;
3449
2.89M
  NEXTL(l);
3450
2.89M
  c = CUR_CHAR(l);
3451
2.89M
  if (c == 0) {
3452
10.5k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
10.5k
      ctxt->input->cur -= l;
3459
10.5k
      GROW;
3460
10.5k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
10.5k
      ctxt->input->cur += l;
3463
10.5k
      c = CUR_CHAR(l);
3464
10.5k
  }
3465
2.89M
    }
3466
81.7k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
81.7k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
81.7k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
12.8M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
12.8M
    const xmlChar *in, *e;
3491
12.8M
    const xmlChar *ret;
3492
12.8M
    size_t count = 0;
3493
12.8M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
3.31M
                       XML_MAX_TEXT_LENGTH :
3495
12.8M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
12.8M
    in = ctxt->input->cur;
3505
12.8M
    e = ctxt->input->end;
3506
12.8M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
12.8M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
12.8M
   (*in == '_')) && (in < e)) {
3509
12.5M
  in++;
3510
47.2M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
47.2M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
47.2M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
47.2M
          (*in == '_') || (*in == '-') ||
3514
47.2M
          (*in == '.')) && (in < e))
3515
34.6M
      in++;
3516
12.5M
  if (in >= e)
3517
2.70k
      goto complex;
3518
12.5M
  if ((*in > 0) && (*in < 0x80)) {
3519
12.5M
      count = in - ctxt->input->cur;
3520
12.5M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
12.5M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
12.5M
      ctxt->input->cur = in;
3526
12.5M
      ctxt->input->col += count;
3527
12.5M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
12.5M
      return(ret);
3531
12.5M
  }
3532
12.5M
    }
3533
351k
complex:
3534
351k
    return(xmlParseNCNameComplex(ctxt));
3535
12.8M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
4.14M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
4.14M
    register const xmlChar *cmp = other;
3551
4.14M
    register const xmlChar *in;
3552
4.14M
    const xmlChar *ret;
3553
3554
4.14M
    GROW;
3555
4.14M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
4.14M
    in = ctxt->input->cur;
3559
21.4M
    while (*in != 0 && *in == *cmp) {
3560
17.2M
  ++in;
3561
17.2M
  ++cmp;
3562
17.2M
    }
3563
4.14M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
3.98M
  ctxt->input->col += in - ctxt->input->cur;
3566
3.98M
  ctxt->input->cur = in;
3567
3.98M
  return (const xmlChar*) 1;
3568
3.98M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
161k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
161k
    if (ret == other) {
3573
8.59k
  return (const xmlChar*) 1;
3574
8.59k
    }
3575
153k
    return ret;
3576
161k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
26.9M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
26.9M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
26.9M
    const xmlChar *cur = *str;
3600
26.9M
    int len = 0, l;
3601
26.9M
    int c;
3602
26.9M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
1.55M
                    XML_MAX_TEXT_LENGTH :
3604
26.9M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
26.9M
    c = CUR_SCHAR(cur, l);
3611
26.9M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
12.5k
  return(NULL);
3613
12.5k
    }
3614
3615
26.8M
    COPY_BUF(l,buf,len,c);
3616
26.8M
    cur += l;
3617
26.8M
    c = CUR_SCHAR(cur, l);
3618
236M
    while (xmlIsNameChar(ctxt, c)) {
3619
210M
  COPY_BUF(l,buf,len,c);
3620
210M
  cur += l;
3621
210M
  c = CUR_SCHAR(cur, l);
3622
210M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
1.19M
      xmlChar *buffer;
3628
1.19M
      int max = len * 2;
3629
3630
1.19M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
1.19M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
1.19M
      memcpy(buffer, buf, len);
3636
272M
      while (xmlIsNameChar(ctxt, c)) {
3637
270M
    if (len + 10 > max) {
3638
1.19M
        xmlChar *tmp;
3639
3640
1.19M
        max *= 2;
3641
1.19M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
1.19M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
1.19M
        buffer = tmp;
3648
1.19M
    }
3649
270M
    COPY_BUF(l,buffer,len,c);
3650
270M
    cur += l;
3651
270M
    c = CUR_SCHAR(cur, l);
3652
270M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
270M
      }
3658
1.19M
      buffer[len] = 0;
3659
1.19M
      *str = cur;
3660
1.19M
      return(buffer);
3661
1.19M
  }
3662
210M
    }
3663
25.6M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
25.6M
    *str = cur;
3668
25.6M
    return(xmlStrndup(buf, len));
3669
25.6M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
1.01M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
1.01M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
1.01M
    int len = 0, l;
3690
1.01M
    int c;
3691
1.01M
    int count = 0;
3692
1.01M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
444k
                    XML_MAX_TEXT_LENGTH :
3694
1.01M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
1.01M
    GROW;
3701
1.01M
    if (ctxt->instate == XML_PARSER_EOF)
3702
6
        return(NULL);
3703
1.01M
    c = CUR_CHAR(l);
3704
3705
5.96M
    while (xmlIsNameChar(ctxt, c)) {
3706
4.95M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
4.95M
  COPY_BUF(l,buf,len,c);
3711
4.95M
  NEXTL(l);
3712
4.95M
  c = CUR_CHAR(l);
3713
4.95M
  if (c == 0) {
3714
1.65k
      count = 0;
3715
1.65k
      GROW;
3716
1.65k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
1.65k
            c = CUR_CHAR(l);
3719
1.65k
  }
3720
4.95M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
975
      xmlChar *buffer;
3726
975
      int max = len * 2;
3727
3728
975
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
975
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
975
      memcpy(buffer, buf, len);
3734
1.58M
      while (xmlIsNameChar(ctxt, c)) {
3735
1.58M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
16.0k
        count = 0;
3737
16.0k
        GROW;
3738
16.0k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
16.0k
    }
3743
1.58M
    if (len + 10 > max) {
3744
1.79k
        xmlChar *tmp;
3745
3746
1.79k
        max *= 2;
3747
1.79k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
1.79k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
1.79k
        buffer = tmp;
3754
1.79k
    }
3755
1.58M
    COPY_BUF(l,buffer,len,c);
3756
1.58M
    NEXTL(l);
3757
1.58M
    c = CUR_CHAR(l);
3758
1.58M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
1.58M
      }
3764
975
      buffer[len] = 0;
3765
975
      return(buffer);
3766
975
  }
3767
4.95M
    }
3768
1.01M
    if (len == 0)
3769
7.41k
        return(NULL);
3770
1.00M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
1.00M
    return(xmlStrndup(buf, len));
3775
1.00M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
710k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
710k
    xmlChar *buf = NULL;
3795
710k
    int len = 0;
3796
710k
    int size = XML_PARSER_BUFFER_SIZE;
3797
710k
    int c, l;
3798
710k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
266k
                    XML_MAX_HUGE_LENGTH :
3800
710k
                    XML_MAX_TEXT_LENGTH;
3801
710k
    xmlChar stop;
3802
710k
    xmlChar *ret = NULL;
3803
710k
    const xmlChar *cur = NULL;
3804
710k
    xmlParserInputPtr input;
3805
3806
710k
    if (RAW == '"') stop = '"';
3807
181k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
710k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
710k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
710k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
710k
    input = ctxt->input;
3824
710k
    GROW;
3825
710k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
710k
    NEXT;
3828
710k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
40.3M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
40.3M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
39.6M
  if (len + 5 >= size) {
3841
99.3k
      xmlChar *tmp;
3842
3843
99.3k
      size *= 2;
3844
99.3k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
99.3k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
99.3k
      buf = tmp;
3850
99.3k
  }
3851
39.6M
  COPY_BUF(l,buf,len,c);
3852
39.6M
  NEXTL(l);
3853
3854
39.6M
  GROW;
3855
39.6M
  c = CUR_CHAR(l);
3856
39.6M
  if (c == 0) {
3857
1.17k
      GROW;
3858
1.17k
      c = CUR_CHAR(l);
3859
1.17k
  }
3860
3861
39.6M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
39.6M
    }
3867
710k
    buf[len] = 0;
3868
710k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
710k
    if (c != stop) {
3871
1.82k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
1.82k
        goto error;
3873
1.82k
    }
3874
708k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
708k
    cur = buf;
3882
31.2M
    while (*cur != 0) { /* non input consuming */
3883
30.5M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
429k
      xmlChar *name;
3885
429k
      xmlChar tmp = *cur;
3886
429k
            int nameOk = 0;
3887
3888
429k
      cur++;
3889
429k
      name = xmlParseStringName(ctxt, &cur);
3890
429k
            if (name != NULL) {
3891
427k
                nameOk = 1;
3892
427k
                xmlFree(name);
3893
427k
            }
3894
429k
            if ((nameOk == 0) || (*cur != ';')) {
3895
5.81k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
5.81k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
5.81k
                            tmp);
3898
5.81k
                goto error;
3899
5.81k
      }
3900
423k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
423k
    (ctxt->inputNr == 1)) {
3902
5.28k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
5.28k
                goto error;
3904
5.28k
      }
3905
418k
      if (*cur == 0)
3906
0
          break;
3907
418k
  }
3908
30.5M
  cur++;
3909
30.5M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
697k
    ++ctxt->depth;
3920
697k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
697k
                                     0, 0, 0, /* check */ 1);
3922
697k
    --ctxt->depth;
3923
3924
697k
    if (orig != NULL) {
3925
697k
        *orig = buf;
3926
697k
        buf = NULL;
3927
697k
    }
3928
3929
710k
error:
3930
710k
    if (buf != NULL)
3931
12.9k
        xmlFree(buf);
3932
710k
    return(ret);
3933
697k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
372k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
372k
    xmlChar limit = 0;
3950
372k
    xmlChar *buf = NULL;
3951
372k
    xmlChar *rep = NULL;
3952
372k
    size_t len = 0;
3953
372k
    size_t buf_size = 0;
3954
372k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
140k
                       XML_MAX_HUGE_LENGTH :
3956
372k
                       XML_MAX_TEXT_LENGTH;
3957
372k
    int c, l, in_space = 0;
3958
372k
    xmlChar *current = NULL;
3959
372k
    xmlEntityPtr ent;
3960
3961
372k
    if (NXT(0) == '"') {
3962
254k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
254k
  limit = '"';
3964
254k
        NEXT;
3965
254k
    } else if (NXT(0) == '\'') {
3966
117k
  limit = '\'';
3967
117k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
117k
        NEXT;
3969
117k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
372k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
372k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
372k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
372k
    c = CUR_CHAR(l);
3985
19.9M
    while (((NXT(0) != limit) && /* checked */
3986
19.9M
            (IS_CHAR(c)) && (c != '<')) &&
3987
19.9M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
19.6M
  if (c == '&') {
3989
2.47M
      in_space = 0;
3990
2.47M
      if (NXT(1) == '#') {
3991
184k
    int val = xmlParseCharRef(ctxt);
3992
3993
184k
    if (val == '&') {
3994
2.75k
        if (ctxt->replaceEntities) {
3995
1.22k
      if (len + 10 > buf_size) {
3996
118
          growBuffer(buf, 10);
3997
118
      }
3998
1.22k
      buf[len++] = '&';
3999
1.52k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
1.52k
      if (len + 10 > buf_size) {
4005
112
          growBuffer(buf, 10);
4006
112
      }
4007
1.52k
      buf[len++] = '&';
4008
1.52k
      buf[len++] = '#';
4009
1.52k
      buf[len++] = '3';
4010
1.52k
      buf[len++] = '8';
4011
1.52k
      buf[len++] = ';';
4012
1.52k
        }
4013
181k
    } else if (val != 0) {
4014
163k
        if (len + 10 > buf_size) {
4015
1.97k
      growBuffer(buf, 10);
4016
1.97k
        }
4017
163k
        len += xmlCopyChar(0, &buf[len], val);
4018
163k
    }
4019
2.29M
      } else {
4020
2.29M
    ent = xmlParseEntityRef(ctxt);
4021
2.29M
    if ((ent != NULL) &&
4022
2.29M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
41.4k
        if (len + 10 > buf_size) {
4024
232
      growBuffer(buf, 10);
4025
232
        }
4026
41.4k
        if ((ctxt->replaceEntities == 0) &&
4027
41.4k
            (ent->content[0] == '&')) {
4028
13.6k
      buf[len++] = '&';
4029
13.6k
      buf[len++] = '#';
4030
13.6k
      buf[len++] = '3';
4031
13.6k
      buf[len++] = '8';
4032
13.6k
      buf[len++] = ';';
4033
27.7k
        } else {
4034
27.7k
      buf[len++] = ent->content[0];
4035
27.7k
        }
4036
2.25M
    } else if ((ent != NULL) &&
4037
2.25M
               (ctxt->replaceEntities != 0)) {
4038
1.33M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
1.33M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
1.33M
      ++ctxt->depth;
4043
1.33M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
1.33M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
1.33M
                                /* check */ 1);
4046
1.33M
      --ctxt->depth;
4047
1.33M
      if (rep != NULL) {
4048
1.31M
          current = rep;
4049
259M
          while (*current != 0) { /* non input consuming */
4050
257M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
257M
                                    (*current == 0x9)) {
4052
390k
                                    buf[len++] = 0x20;
4053
390k
                                    current++;
4054
390k
                                } else
4055
257M
                                    buf[len++] = *current++;
4056
257M
        if (len + 10 > buf_size) {
4057
31.7k
            growBuffer(buf, 10);
4058
31.7k
        }
4059
257M
          }
4060
1.31M
          xmlFree(rep);
4061
1.31M
          rep = NULL;
4062
1.31M
      }
4063
1.33M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
1.33M
    } else if (ent != NULL) {
4071
637k
        int i = xmlStrlen(ent->name);
4072
637k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
637k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
637k
      (ent->content != NULL)) {
4081
617k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
12.4k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
12.4k
                            ctxt->sizeentcopy = ent->length;
4085
4086
12.4k
                            ++ctxt->depth;
4087
12.4k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
12.4k
                                    ent->content, ent->length,
4089
12.4k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
12.4k
                                    /* check */ 1);
4091
12.4k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
12.4k
                            if (ctxt->inSubset == 0) {
4100
8.73k
                                ent->flags |= XML_ENT_CHECKED;
4101
8.73k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
8.73k
                            }
4103
4104
12.4k
                            if (rep != NULL) {
4105
12.2k
                                xmlFree(rep);
4106
12.2k
                                rep = NULL;
4107
12.2k
                            } else {
4108
176
                                ent->content[0] = 0;
4109
176
                            }
4110
4111
12.4k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
5
                                goto error;
4113
604k
                        } else {
4114
604k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
11
                                goto error;
4116
604k
                        }
4117
617k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
637k
        buf[len++] = '&';
4123
641k
        while (len + i + 10 > buf_size) {
4124
7.35k
      growBuffer(buf, i + 10);
4125
7.35k
        }
4126
1.53M
        for (;i > 0;i--)
4127
899k
      buf[len++] = *cur++;
4128
637k
        buf[len++] = ';';
4129
637k
    }
4130
2.29M
      }
4131
17.1M
  } else {
4132
17.1M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
2.43M
          if ((len != 0) || (!normalize)) {
4134
2.24M
        if ((!normalize) || (!in_space)) {
4135
1.96M
      COPY_BUF(l,buf,len,0x20);
4136
1.97M
      while (len + 10 > buf_size) {
4137
11.1k
          growBuffer(buf, 10);
4138
11.1k
      }
4139
1.96M
        }
4140
2.24M
        in_space = 1;
4141
2.24M
    }
4142
14.7M
      } else {
4143
14.7M
          in_space = 0;
4144
14.7M
    COPY_BUF(l,buf,len,c);
4145
14.7M
    if (len + 10 > buf_size) {
4146
69.4k
        growBuffer(buf, 10);
4147
69.4k
    }
4148
14.7M
      }
4149
17.1M
      NEXTL(l);
4150
17.1M
  }
4151
19.6M
  GROW;
4152
19.6M
  c = CUR_CHAR(l);
4153
19.6M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
19.6M
    }
4159
372k
    if (ctxt->instate == XML_PARSER_EOF)
4160
575
        goto error;
4161
4162
372k
    if ((in_space) && (normalize)) {
4163
24.1k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
9.28k
    }
4165
372k
    buf[len] = 0;
4166
372k
    if (RAW == '<') {
4167
66.9k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
305k
    } else if (RAW != limit) {
4169
63.2k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
28.5k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
28.5k
         "invalid character in attribute value\n");
4172
34.7k
  } else {
4173
34.7k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
34.7k
         "AttValue: ' expected\n");
4175
34.7k
        }
4176
63.2k
    } else
4177
241k
  NEXT;
4178
4179
372k
    if (attlen != NULL) *attlen = len;
4180
372k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
591
error:
4185
591
    if (buf != NULL)
4186
591
        xmlFree(buf);
4187
591
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
591
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
2.96M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
2.96M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
2.96M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
2.96M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
361k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
361k
    xmlChar *buf = NULL;
4250
361k
    int len = 0;
4251
361k
    int size = XML_PARSER_BUFFER_SIZE;
4252
361k
    int cur, l;
4253
361k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
154k
                    XML_MAX_TEXT_LENGTH :
4255
361k
                    XML_MAX_NAME_LENGTH;
4256
361k
    xmlChar stop;
4257
361k
    int state = ctxt->instate;
4258
361k
    int count = 0;
4259
4260
361k
    SHRINK;
4261
361k
    if (RAW == '"') {
4262
271k
        NEXT;
4263
271k
  stop = '"';
4264
271k
    } else if (RAW == '\'') {
4265
82.4k
        NEXT;
4266
82.4k
  stop = '\'';
4267
82.4k
    } else {
4268
7.60k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
7.60k
  return(NULL);
4270
7.60k
    }
4271
4272
353k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
353k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
353k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
353k
    cur = CUR_CHAR(l);
4279
15.7M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
15.4M
  if (len + 5 >= size) {
4281
10.8k
      xmlChar *tmp;
4282
4283
10.8k
      size *= 2;
4284
10.8k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
10.8k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
10.8k
      buf = tmp;
4292
10.8k
  }
4293
15.4M
  count++;
4294
15.4M
  if (count > 50) {
4295
226k
      SHRINK;
4296
226k
      GROW;
4297
226k
      count = 0;
4298
226k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
226k
  }
4303
15.4M
  COPY_BUF(l,buf,len,cur);
4304
15.4M
  NEXTL(l);
4305
15.4M
  cur = CUR_CHAR(l);
4306
15.4M
  if (cur == 0) {
4307
3.04k
      GROW;
4308
3.04k
      SHRINK;
4309
3.04k
      cur = CUR_CHAR(l);
4310
3.04k
  }
4311
15.4M
        if (len > maxLength) {
4312
89
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
89
            xmlFree(buf);
4314
89
            ctxt->instate = (xmlParserInputState) state;
4315
89
            return(NULL);
4316
89
        }
4317
15.4M
    }
4318
353k
    buf[len] = 0;
4319
353k
    ctxt->instate = (xmlParserInputState) state;
4320
353k
    if (!IS_CHAR(cur)) {
4321
4.06k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
349k
    } else {
4323
349k
  NEXT;
4324
349k
    }
4325
353k
    return(buf);
4326
353k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
51.1k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
51.1k
    xmlChar *buf = NULL;
4344
51.1k
    int len = 0;
4345
51.1k
    int size = XML_PARSER_BUFFER_SIZE;
4346
51.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
19.1k
                    XML_MAX_TEXT_LENGTH :
4348
51.1k
                    XML_MAX_NAME_LENGTH;
4349
51.1k
    xmlChar cur;
4350
51.1k
    xmlChar stop;
4351
51.1k
    int count = 0;
4352
51.1k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
51.1k
    SHRINK;
4355
51.1k
    if (RAW == '"') {
4356
36.9k
        NEXT;
4357
36.9k
  stop = '"';
4358
36.9k
    } else if (RAW == '\'') {
4359
12.4k
        NEXT;
4360
12.4k
  stop = '\'';
4361
12.4k
    } else {
4362
1.74k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
1.74k
  return(NULL);
4364
1.74k
    }
4365
49.4k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
49.4k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
49.4k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
49.4k
    cur = CUR;
4372
3.72M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
3.67M
  if (len + 1 >= size) {
4374
4.66k
      xmlChar *tmp;
4375
4376
4.66k
      size *= 2;
4377
4.66k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
4.66k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
4.66k
      buf = tmp;
4384
4.66k
  }
4385
3.67M
  buf[len++] = cur;
4386
3.67M
  count++;
4387
3.67M
  if (count > 50) {
4388
52.3k
      SHRINK;
4389
52.3k
      GROW;
4390
52.3k
      count = 0;
4391
52.3k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
52.3k
  }
4396
3.67M
  NEXT;
4397
3.67M
  cur = CUR;
4398
3.67M
  if (cur == 0) {
4399
913
      GROW;
4400
913
      SHRINK;
4401
913
      cur = CUR;
4402
913
  }
4403
3.67M
        if (len > maxLength) {
4404
14
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
14
            xmlFree(buf);
4406
14
            return(NULL);
4407
14
        }
4408
3.67M
    }
4409
49.4k
    buf[len] = 0;
4410
49.4k
    if (cur != stop) {
4411
7.48k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
41.9k
    } else {
4413
41.9k
  NEXT;
4414
41.9k
    }
4415
49.4k
    ctxt->instate = oldstate;
4416
49.4k
    return(buf);
4417
49.4k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
14.2M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
14.2M
    const xmlChar *in;
4482
14.2M
    int nbchar = 0;
4483
14.2M
    int line = ctxt->input->line;
4484
14.2M
    int col = ctxt->input->col;
4485
14.2M
    int ccol;
4486
4487
14.2M
    SHRINK;
4488
14.2M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
14.2M
    in = ctxt->input->cur;
4494
18.1M
    do {
4495
22.6M
get_more_space:
4496
30.8M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
22.6M
        if (*in == 0xA) {
4498
4.73M
            do {
4499
4.73M
                ctxt->input->line++; ctxt->input->col = 1;
4500
4.73M
                in++;
4501
4.73M
            } while (*in == 0xA);
4502
4.56M
            goto get_more_space;
4503
4.56M
        }
4504
18.1M
        if (*in == '<') {
4505
3.83M
            nbchar = in - ctxt->input->cur;
4506
3.83M
            if (nbchar > 0) {
4507
3.83M
                const xmlChar *tmp = ctxt->input->cur;
4508
3.83M
                ctxt->input->cur = in;
4509
4510
3.83M
                if ((ctxt->sax != NULL) &&
4511
3.83M
                    (ctxt->sax->ignorableWhitespace !=
4512
3.83M
                     ctxt->sax->characters)) {
4513
1.62M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.24M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.24M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.24M
                                                   tmp, nbchar);
4517
1.24M
                    } else {
4518
381k
                        if (ctxt->sax->characters != NULL)
4519
381k
                            ctxt->sax->characters(ctxt->userData,
4520
381k
                                                  tmp, nbchar);
4521
381k
                        if (*ctxt->space == -1)
4522
88.5k
                            *ctxt->space = -2;
4523
381k
                    }
4524
2.20M
                } else if ((ctxt->sax != NULL) &&
4525
2.20M
                           (ctxt->sax->characters != NULL)) {
4526
2.20M
                    ctxt->sax->characters(ctxt->userData,
4527
2.20M
                                          tmp, nbchar);
4528
2.20M
                }
4529
3.83M
            }
4530
3.83M
            return;
4531
3.83M
        }
4532
4533
18.4M
get_more:
4534
18.4M
        ccol = ctxt->input->col;
4535
254M
        while (test_char_data[*in]) {
4536
236M
            in++;
4537
236M
            ccol++;
4538
236M
        }
4539
18.4M
        ctxt->input->col = ccol;
4540
18.4M
        if (*in == 0xA) {
4541
3.91M
            do {
4542
3.91M
                ctxt->input->line++; ctxt->input->col = 1;
4543
3.91M
                in++;
4544
3.91M
            } while (*in == 0xA);
4545
3.82M
            goto get_more;
4546
3.82M
        }
4547
14.6M
        if (*in == ']') {
4548
331k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
7.08k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
7.08k
                ctxt->input->cur = in + 1;
4551
7.08k
                return;
4552
7.08k
            }
4553
324k
            in++;
4554
324k
            ctxt->input->col++;
4555
324k
            goto get_more;
4556
331k
        }
4557
14.2M
        nbchar = in - ctxt->input->cur;
4558
14.2M
        if (nbchar > 0) {
4559
10.0M
            if ((ctxt->sax != NULL) &&
4560
10.0M
                (ctxt->sax->ignorableWhitespace !=
4561
10.0M
                 ctxt->sax->characters) &&
4562
10.0M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
2.08M
                const xmlChar *tmp = ctxt->input->cur;
4564
2.08M
                ctxt->input->cur = in;
4565
4566
2.08M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
778k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
778k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
778k
                                                       tmp, nbchar);
4570
1.30M
                } else {
4571
1.30M
                    if (ctxt->sax->characters != NULL)
4572
1.30M
                        ctxt->sax->characters(ctxt->userData,
4573
1.30M
                                              tmp, nbchar);
4574
1.30M
                    if (*ctxt->space == -1)
4575
291k
                        *ctxt->space = -2;
4576
1.30M
                }
4577
2.08M
                line = ctxt->input->line;
4578
2.08M
                col = ctxt->input->col;
4579
7.99M
            } else if (ctxt->sax != NULL) {
4580
7.99M
                if (ctxt->sax->characters != NULL)
4581
7.99M
                    ctxt->sax->characters(ctxt->userData,
4582
7.99M
                                          ctxt->input->cur, nbchar);
4583
7.99M
                line = ctxt->input->line;
4584
7.99M
                col = ctxt->input->col;
4585
7.99M
            }
4586
10.0M
        }
4587
14.2M
        ctxt->input->cur = in;
4588
14.2M
        if (*in == 0xD) {
4589
3.88M
            in++;
4590
3.88M
            if (*in == 0xA) {
4591
3.85M
                ctxt->input->cur = in;
4592
3.85M
                in++;
4593
3.85M
                ctxt->input->line++; ctxt->input->col = 1;
4594
3.85M
                continue; /* while */
4595
3.85M
            }
4596
29.7k
            in--;
4597
29.7k
        }
4598
10.4M
        if (*in == '<') {
4599
8.12M
            return;
4600
8.12M
        }
4601
2.30M
        if (*in == '&') {
4602
737k
            return;
4603
737k
        }
4604
1.56M
        SHRINK;
4605
1.56M
        GROW;
4606
1.56M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
1.56M
        in = ctxt->input->cur;
4609
5.42M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
5.42M
             (*in == 0x09) || (*in == 0x0a));
4611
1.59M
    ctxt->input->line = line;
4612
1.59M
    ctxt->input->col = col;
4613
1.59M
    xmlParseCharDataComplex(ctxt);
4614
1.59M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
1.59M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
1.59M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
1.59M
    int nbchar = 0;
4631
1.59M
    int cur, l;
4632
1.59M
    int count = 0;
4633
4634
1.59M
    SHRINK;
4635
1.59M
    GROW;
4636
1.59M
    cur = CUR_CHAR(l);
4637
30.6M
    while ((cur != '<') && /* checked */
4638
30.6M
           (cur != '&') &&
4639
30.6M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
29.0M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
5.23k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
5.23k
  }
4643
29.0M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
29.0M
  NEXTL(l);
4646
29.0M
  cur = CUR_CHAR(l);
4647
29.0M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
75.8k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
75.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
60.5k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
234
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
234
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
234
                                     buf, nbchar);
4658
60.3k
    } else {
4659
60.3k
        if (ctxt->sax->characters != NULL)
4660
60.3k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
60.3k
        if ((ctxt->sax->characters !=
4662
60.3k
             ctxt->sax->ignorableWhitespace) &&
4663
60.3k
      (*ctxt->space == -1))
4664
1.77k
      *ctxt->space = -2;
4665
60.3k
    }
4666
60.5k
      }
4667
75.8k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
75.8k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
75.8k
  }
4672
29.0M
  count++;
4673
29.0M
  if (count > 50) {
4674
397k
      SHRINK;
4675
397k
      GROW;
4676
397k
      count = 0;
4677
397k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
397k
  }
4680
29.0M
    }
4681
1.59M
    if (nbchar != 0) {
4682
848k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
848k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
711k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
1.93k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
1.93k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
709k
      } else {
4691
709k
    if (ctxt->sax->characters != NULL)
4692
709k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
709k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
709k
        (*ctxt->space == -1))
4695
58.0k
        *ctxt->space = -2;
4696
709k
      }
4697
711k
  }
4698
848k
    }
4699
1.59M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
1.12M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
1.12M
                          "PCDATA invalid Char value %d\n",
4703
1.12M
                    cur ? cur : CUR);
4704
1.12M
  NEXT;
4705
1.12M
    }
4706
1.59M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
532k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
532k
    xmlChar *URI = NULL;
4735
4736
532k
    SHRINK;
4737
4738
532k
    *publicID = NULL;
4739
532k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
319k
        SKIP(6);
4741
319k
  if (SKIP_BLANKS == 0) {
4742
1.31k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
1.31k
                     "Space required after 'SYSTEM'\n");
4744
1.31k
  }
4745
319k
  URI = xmlParseSystemLiteral(ctxt);
4746
319k
  if (URI == NULL) {
4747
1.72k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
1.72k
        }
4749
319k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
51.1k
        SKIP(6);
4751
51.1k
  if (SKIP_BLANKS == 0) {
4752
1.64k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
1.64k
        "Space required after 'PUBLIC'\n");
4754
1.64k
  }
4755
51.1k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
51.1k
  if (*publicID == NULL) {
4757
1.76k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
1.76k
  }
4759
51.1k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
41.7k
      if (SKIP_BLANKS == 0) {
4764
5.66k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
5.66k
      "Space required after the Public Identifier\n");
4766
5.66k
      }
4767
41.7k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
9.41k
      if (SKIP_BLANKS == 0) return(NULL);
4775
1.03k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
1.03k
  }
4777
42.1k
  URI = xmlParseSystemLiteral(ctxt);
4778
42.1k
  if (URI == NULL) {
4779
5.97k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
5.97k
        }
4781
42.1k
    }
4782
522k
    return(URI);
4783
532k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
1.68M
                       size_t len, size_t size) {
4802
1.68M
    int q, ql;
4803
1.68M
    int r, rl;
4804
1.68M
    int cur, l;
4805
1.68M
    size_t count = 0;
4806
1.68M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
690k
                       XML_MAX_HUGE_LENGTH :
4808
1.68M
                       XML_MAX_TEXT_LENGTH;
4809
1.68M
    int inputid;
4810
4811
1.68M
    inputid = ctxt->input->id;
4812
4813
1.68M
    if (buf == NULL) {
4814
45.6k
        len = 0;
4815
45.6k
  size = XML_PARSER_BUFFER_SIZE;
4816
45.6k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
45.6k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
45.6k
    }
4822
1.68M
    GROW; /* Assure there's enough input data */
4823
1.68M
    q = CUR_CHAR(ql);
4824
1.68M
    if (q == 0)
4825
1.52M
        goto not_terminated;
4826
152k
    if (!IS_CHAR(q)) {
4827
4.97k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
4.97k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
4.97k
                    q);
4830
4.97k
  xmlFree (buf);
4831
4.97k
  return;
4832
4.97k
    }
4833
147k
    NEXTL(ql);
4834
147k
    r = CUR_CHAR(rl);
4835
147k
    if (r == 0)
4836
1.45k
        goto not_terminated;
4837
145k
    if (!IS_CHAR(r)) {
4838
1.35k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
1.35k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
1.35k
                    r);
4841
1.35k
  xmlFree (buf);
4842
1.35k
  return;
4843
1.35k
    }
4844
144k
    NEXTL(rl);
4845
144k
    cur = CUR_CHAR(l);
4846
144k
    if (cur == 0)
4847
1.65k
        goto not_terminated;
4848
21.4M
    while (IS_CHAR(cur) && /* checked */
4849
21.4M
           ((cur != '>') ||
4850
21.3M
      (r != '-') || (q != '-'))) {
4851
21.3M
  if ((r == '-') && (q == '-')) {
4852
14.6k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
14.6k
  }
4854
21.3M
  if (len + 5 >= size) {
4855
39.7k
      xmlChar *new_buf;
4856
39.7k
            size_t new_size;
4857
4858
39.7k
      new_size = size * 2;
4859
39.7k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
39.7k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
39.7k
      buf = new_buf;
4866
39.7k
            size = new_size;
4867
39.7k
  }
4868
21.3M
  COPY_BUF(ql,buf,len,q);
4869
21.3M
  q = r;
4870
21.3M
  ql = rl;
4871
21.3M
  r = cur;
4872
21.3M
  rl = l;
4873
4874
21.3M
  count++;
4875
21.3M
  if (count > 50) {
4876
384k
      SHRINK;
4877
384k
      GROW;
4878
384k
      count = 0;
4879
384k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
384k
  }
4884
21.3M
  NEXTL(l);
4885
21.3M
  cur = CUR_CHAR(l);
4886
21.3M
  if (cur == 0) {
4887
107k
      SHRINK;
4888
107k
      GROW;
4889
107k
      cur = CUR_CHAR(l);
4890
107k
  }
4891
4892
21.3M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
21.3M
    }
4899
142k
    buf[len] = 0;
4900
142k
    if (cur == 0) {
4901
107k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
107k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
107k
    } else if (!IS_CHAR(cur)) {
4904
4.39k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
4.39k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
4.39k
                    cur);
4907
31.2k
    } else {
4908
31.2k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
31.2k
        NEXT;
4914
31.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
31.2k
      (!ctxt->disableSAX))
4916
22.7k
      ctxt->sax->comment(ctxt->userData, buf);
4917
31.2k
    }
4918
142k
    xmlFree(buf);
4919
142k
    return;
4920
1.53M
not_terminated:
4921
1.53M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
1.53M
       "Comment not terminated\n", NULL);
4923
1.53M
    xmlFree(buf);
4924
1.53M
    return;
4925
142k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
38.4M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
38.4M
    xmlChar *buf = NULL;
4943
38.4M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
38.4M
    size_t len = 0;
4945
38.4M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
9.32M
                       XML_MAX_HUGE_LENGTH :
4947
38.4M
                       XML_MAX_TEXT_LENGTH;
4948
38.4M
    xmlParserInputState state;
4949
38.4M
    const xmlChar *in;
4950
38.4M
    size_t nbchar = 0;
4951
38.4M
    int ccol;
4952
38.4M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
38.4M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
38.4M
    SKIP(2);
4960
38.4M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
162
        return;
4962
38.4M
    state = ctxt->instate;
4963
38.4M
    ctxt->instate = XML_PARSER_COMMENT;
4964
38.4M
    inputid = ctxt->input->id;
4965
38.4M
    SKIP(2);
4966
38.4M
    SHRINK;
4967
38.4M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
38.4M
    in = ctxt->input->cur;
4974
38.4M
    do {
4975
38.4M
  if (*in == 0xA) {
4976
91.0k
      do {
4977
91.0k
    ctxt->input->line++; ctxt->input->col = 1;
4978
91.0k
    in++;
4979
91.0k
      } while (*in == 0xA);
4980
87.8k
  }
4981
43.4M
get_more:
4982
43.4M
        ccol = ctxt->input->col;
4983
185M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
185M
         ((*in >= 0x20) && (*in < '-')) ||
4985
185M
         (*in == 0x09)) {
4986
142M
        in++;
4987
142M
        ccol++;
4988
142M
  }
4989
43.4M
  ctxt->input->col = ccol;
4990
43.4M
  if (*in == 0xA) {
4991
1.09M
      do {
4992
1.09M
    ctxt->input->line++; ctxt->input->col = 1;
4993
1.09M
    in++;
4994
1.09M
      } while (*in == 0xA);
4995
1.05M
      goto get_more;
4996
1.05M
  }
4997
42.3M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
42.3M
  if (nbchar > 0) {
5002
5.09M
      if ((ctxt->sax != NULL) &&
5003
5.09M
    (ctxt->sax->comment != NULL)) {
5004
5.09M
    if (buf == NULL) {
5005
2.24M
        if ((*in == '-') && (in[1] == '-'))
5006
340k
            size = nbchar + 1;
5007
1.90M
        else
5008
1.90M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
2.24M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
2.24M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
2.24M
        len = 0;
5016
2.84M
    } else if (len + nbchar + 1 >= size) {
5017
297k
        xmlChar *new_buf;
5018
297k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
297k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
297k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
297k
        buf = new_buf;
5027
297k
    }
5028
5.09M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
5.09M
    len += nbchar;
5030
5.09M
    buf[len] = 0;
5031
5.09M
      }
5032
5.09M
  }
5033
42.3M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
42.3M
  ctxt->input->cur = in;
5040
42.3M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
42.3M
  if (*in == 0xD) {
5045
1.54M
      in++;
5046
1.54M
      if (*in == 0xA) {
5047
1.54M
    ctxt->input->cur = in;
5048
1.54M
    in++;
5049
1.54M
    ctxt->input->line++; ctxt->input->col = 1;
5050
1.54M
    goto get_more;
5051
1.54M
      }
5052
5.68k
      in--;
5053
5.68k
  }
5054
40.8M
  SHRINK;
5055
40.8M
  GROW;
5056
40.8M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
40.8M
  in = ctxt->input->cur;
5061
40.8M
  if (*in == '-') {
5062
39.1M
      if (in[1] == '-') {
5063
37.8M
          if (in[2] == '>') {
5064
36.7M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
36.7M
        SKIP(3);
5070
36.7M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
36.7M
            (!ctxt->disableSAX)) {
5072
27.3M
      if (buf != NULL)
5073
544k
          ctxt->sax->comment(ctxt->userData, buf);
5074
26.7M
      else
5075
26.7M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
27.3M
        }
5077
36.7M
        if (buf != NULL)
5078
611k
            xmlFree(buf);
5079
36.7M
        if (ctxt->instate != XML_PARSER_EOF)
5080
36.7M
      ctxt->instate = state;
5081
36.7M
        return;
5082
36.7M
    }
5083
1.05M
    if (buf != NULL) {
5084
14.3k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
14.3k
                          "Double hyphen within comment: "
5086
14.3k
                                      "<!--%.50s\n",
5087
14.3k
              buf);
5088
14.3k
    } else
5089
1.04M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
1.04M
                          "Double hyphen within comment\n", NULL);
5091
1.05M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
1.05M
    in++;
5096
1.05M
    ctxt->input->col++;
5097
1.05M
      }
5098
2.40M
      in++;
5099
2.40M
      ctxt->input->col++;
5100
2.40M
      goto get_more;
5101
39.1M
  }
5102
40.8M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
1.68M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
1.68M
    ctxt->instate = state;
5105
1.68M
    return;
5106
38.4M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
163k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
163k
    const xmlChar *name;
5125
5126
163k
    name = xmlParseName(ctxt);
5127
163k
    if ((name != NULL) &&
5128
163k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
163k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
163k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
45.5k
  int i;
5132
45.5k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
45.5k
      (name[2] == 'l') && (name[3] == 0)) {
5134
8.13k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
8.13k
     "XML declaration allowed only at the start of the document\n");
5136
8.13k
      return(name);
5137
37.3k
  } else if (name[3] == 0) {
5138
2.18k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
2.18k
      return(name);
5140
2.18k
  }
5141
76.1k
  for (i = 0;;i++) {
5142
76.1k
      if (xmlW3CPIs[i] == NULL) break;
5143
55.7k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
14.7k
          return(name);
5145
55.7k
  }
5146
20.4k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
20.4k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
20.4k
          NULL, NULL);
5149
20.4k
    }
5150
138k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
2.31k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
2.31k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
2.31k
    }
5154
138k
    return(name);
5155
163k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
97
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
97
    xmlChar *URL = NULL;
5176
97
    const xmlChar *tmp, *base;
5177
97
    xmlChar marker;
5178
5179
97
    tmp = catalog;
5180
97
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
97
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
13
  goto error;
5183
84
    tmp += 7;
5184
192
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
84
    if (*tmp != '=') {
5186
30
  return;
5187
30
    }
5188
54
    tmp++;
5189
159
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
54
    marker = *tmp;
5191
54
    if ((marker != '\'') && (marker != '"'))
5192
24
  goto error;
5193
30
    tmp++;
5194
30
    base = tmp;
5195
15.7k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
30
    if (*tmp == 0)
5197
9
  goto error;
5198
21
    URL = xmlStrndup(base, tmp - base);
5199
21
    tmp++;
5200
57
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
21
    if (*tmp != 0)
5202
18
  goto error;
5203
5204
3
    if (URL != NULL) {
5205
3
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
3
  xmlFree(URL);
5207
3
    }
5208
3
    return;
5209
5210
64
error:
5211
64
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
64
            "Catalog PI syntax error: %s\n",
5213
64
      catalog, NULL);
5214
64
    if (URL != NULL)
5215
18
  xmlFree(URL);
5216
64
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
163k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
163k
    xmlChar *buf = NULL;
5235
163k
    size_t len = 0;
5236
163k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
163k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
61.9k
                       XML_MAX_HUGE_LENGTH :
5239
163k
                       XML_MAX_TEXT_LENGTH;
5240
163k
    int cur, l;
5241
163k
    const xmlChar *target;
5242
163k
    xmlParserInputState state;
5243
163k
    int count = 0;
5244
5245
163k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
163k
  int inputid = ctxt->input->id;
5247
163k
  state = ctxt->instate;
5248
163k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
163k
  SKIP(2);
5253
163k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
163k
        target = xmlParsePITarget(ctxt);
5260
163k
  if (target != NULL) {
5261
155k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
19.2k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
19.2k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
19.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
19.2k
        (ctxt->sax->processingInstruction != NULL))
5274
14.9k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
14.9k
                                         target, NULL);
5276
19.2k
    if (ctxt->instate != XML_PARSER_EOF)
5277
19.2k
        ctxt->instate = state;
5278
19.2k
    return;
5279
19.2k
      }
5280
136k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
136k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
136k
      if (SKIP_BLANKS == 0) {
5287
39.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
39.7k
        "ParsePI: PI %s space expected\n", target);
5289
39.7k
      }
5290
136k
      cur = CUR_CHAR(l);
5291
27.8M
      while (IS_CHAR(cur) && /* checked */
5292
27.8M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
27.6M
    if (len + 5 >= size) {
5294
37.7k
        xmlChar *tmp;
5295
37.7k
                    size_t new_size = size * 2;
5296
37.7k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
37.7k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
37.7k
        buf = tmp;
5304
37.7k
                    size = new_size;
5305
37.7k
    }
5306
27.6M
    count++;
5307
27.6M
    if (count > 50) {
5308
501k
        SHRINK;
5309
501k
        GROW;
5310
501k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
501k
        count = 0;
5315
501k
    }
5316
27.6M
    COPY_BUF(l,buf,len,cur);
5317
27.6M
    NEXTL(l);
5318
27.6M
    cur = CUR_CHAR(l);
5319
27.6M
    if (cur == 0) {
5320
23.7k
        SHRINK;
5321
23.7k
        GROW;
5322
23.7k
        cur = CUR_CHAR(l);
5323
23.7k
    }
5324
27.6M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
27.6M
      }
5332
136k
      buf[len] = 0;
5333
136k
      if (cur != '?') {
5334
28.6k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
28.6k
          "ParsePI: PI %s never end ...\n", target);
5336
108k
      } else {
5337
108k
    if (inputid != ctxt->input->id) {
5338
184
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
184
                             "PI declaration doesn't start and stop in"
5340
184
                                   " the same entity\n");
5341
184
    }
5342
108k
    SKIP(2);
5343
5344
108k
#ifdef LIBXML_CATALOG_ENABLED
5345
108k
    if (((state == XML_PARSER_MISC) ||
5346
108k
               (state == XML_PARSER_START)) &&
5347
108k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
97
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
97
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
97
      (allow == XML_CATA_ALLOW_ALL))
5351
97
      xmlParseCatalogPI(ctxt, buf);
5352
97
    }
5353
108k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
108k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
108k
        (ctxt->sax->processingInstruction != NULL))
5361
85.5k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
85.5k
                                         target, buf);
5363
108k
      }
5364
136k
      xmlFree(buf);
5365
136k
  } else {
5366
8.02k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
8.02k
  }
5368
144k
  if (ctxt->instate != XML_PARSER_EOF)
5369
144k
      ctxt->instate = state;
5370
144k
    }
5371
163k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
26.1k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
26.1k
    const xmlChar *name;
5394
26.1k
    xmlChar *Pubid;
5395
26.1k
    xmlChar *Systemid;
5396
5397
26.1k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
26.1k
    SKIP(2);
5400
5401
26.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
25.4k
  int inputid = ctxt->input->id;
5403
25.4k
  SHRINK;
5404
25.4k
  SKIP(8);
5405
25.4k
  if (SKIP_BLANKS == 0) {
5406
532
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
532
         "Space required after '<!NOTATION'\n");
5408
532
      return;
5409
532
  }
5410
5411
24.9k
        name = xmlParseName(ctxt);
5412
24.9k
  if (name == NULL) {
5413
2.51k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
2.51k
      return;
5415
2.51k
  }
5416
22.3k
  if (xmlStrchr(name, ':') != NULL) {
5417
1.89k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
1.89k
         "colons are forbidden from notation names '%s'\n",
5419
1.89k
         name, NULL, NULL);
5420
1.89k
  }
5421
22.3k
  if (SKIP_BLANKS == 0) {
5422
1.98k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.98k
         "Space required after the NOTATION name'\n");
5424
1.98k
      return;
5425
1.98k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
20.4k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
20.4k
  SKIP_BLANKS;
5432
5433
20.4k
  if (RAW == '>') {
5434
11.3k
      if (inputid != ctxt->input->id) {
5435
23
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
23
                         "Notation declaration doesn't start and stop"
5437
23
                               " in the same entity\n");
5438
23
      }
5439
11.3k
      NEXT;
5440
11.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
11.3k
    (ctxt->sax->notationDecl != NULL))
5442
8.91k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
11.3k
  } else {
5444
9.07k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
9.07k
  }
5446
20.4k
  if (Systemid != NULL) xmlFree(Systemid);
5447
20.4k
  if (Pubid != NULL) xmlFree(Pubid);
5448
20.4k
    }
5449
26.1k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
975k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
975k
    const xmlChar *name = NULL;
5478
975k
    xmlChar *value = NULL;
5479
975k
    xmlChar *URI = NULL, *literal = NULL;
5480
975k
    const xmlChar *ndata = NULL;
5481
975k
    int isParameter = 0;
5482
975k
    xmlChar *orig = NULL;
5483
5484
975k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
975k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
975k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
974k
  int inputid = ctxt->input->id;
5491
974k
  SHRINK;
5492
974k
  SKIP(6);
5493
974k
  if (SKIP_BLANKS == 0) {
5494
3.07k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
3.07k
         "Space required after '<!ENTITY'\n");
5496
3.07k
  }
5497
5498
974k
  if (RAW == '%') {
5499
401k
      NEXT;
5500
401k
      if (SKIP_BLANKS == 0) {
5501
1.04k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
1.04k
             "Space required after '%%'\n");
5503
1.04k
      }
5504
401k
      isParameter = 1;
5505
401k
  }
5506
5507
974k
        name = xmlParseName(ctxt);
5508
974k
  if (name == NULL) {
5509
3.35k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
3.35k
                     "xmlParseEntityDecl: no name\n");
5511
3.35k
            return;
5512
3.35k
  }
5513
971k
  if (xmlStrchr(name, ':') != NULL) {
5514
1.95k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
1.95k
         "colons are forbidden from entities names '%s'\n",
5516
1.95k
         name, NULL, NULL);
5517
1.95k
  }
5518
971k
  if (SKIP_BLANKS == 0) {
5519
5.53k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
5.53k
         "Space required after the entity name\n");
5521
5.53k
  }
5522
5523
971k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
971k
  if (isParameter) {
5528
400k
      if ((RAW == '"') || (RAW == '\'')) {
5529
358k
          value = xmlParseEntityValue(ctxt, &orig);
5530
358k
    if (value) {
5531
350k
        if ((ctxt->sax != NULL) &&
5532
350k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
308k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
308k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
308k
            NULL, NULL, value);
5536
350k
    }
5537
358k
      } else {
5538
42.0k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
42.0k
    if ((URI == NULL) && (literal == NULL)) {
5540
2.41k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
2.41k
    }
5542
42.0k
    if (URI) {
5543
39.4k
        xmlURIPtr uri;
5544
5545
39.4k
        uri = xmlParseURI((const char *) URI);
5546
39.4k
        if (uri == NULL) {
5547
2.06k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
2.06k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
37.3k
        } else {
5555
37.3k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
281
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
37.0k
      } else {
5562
37.0k
          if ((ctxt->sax != NULL) &&
5563
37.0k
        (!ctxt->disableSAX) &&
5564
37.0k
        (ctxt->sax->entityDecl != NULL))
5565
35.5k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
35.5k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
35.5k
              literal, URI, NULL);
5568
37.0k
      }
5569
37.3k
      xmlFreeURI(uri);
5570
37.3k
        }
5571
39.4k
    }
5572
42.0k
      }
5573
570k
  } else {
5574
570k
      if ((RAW == '"') || (RAW == '\'')) {
5575
351k
          value = xmlParseEntityValue(ctxt, &orig);
5576
351k
    if ((ctxt->sax != NULL) &&
5577
351k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
319k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
319k
        XML_INTERNAL_GENERAL_ENTITY,
5580
319k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
351k
    if ((ctxt->myDoc == NULL) ||
5585
351k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
3.66k
        if (ctxt->myDoc == NULL) {
5587
357
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
357
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
357
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
357
        }
5594
3.66k
        if (ctxt->myDoc->intSubset == NULL)
5595
357
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
357
              BAD_CAST "fake", NULL, NULL);
5597
5598
3.66k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
3.66k
                    NULL, NULL, value);
5600
3.66k
    }
5601
351k
      } else {
5602
218k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
218k
    if ((URI == NULL) && (literal == NULL)) {
5604
6.77k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
6.77k
    }
5606
218k
    if (URI) {
5607
207k
        xmlURIPtr uri;
5608
5609
207k
        uri = xmlParseURI((const char *)URI);
5610
207k
        if (uri == NULL) {
5611
5.71k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
5.71k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
202k
        } else {
5619
202k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
2.31k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
2.31k
      }
5626
202k
      xmlFreeURI(uri);
5627
202k
        }
5628
207k
    }
5629
218k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
7.73k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
7.73k
           "Space required before 'NDATA'\n");
5632
7.73k
    }
5633
218k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
13.3k
        SKIP(5);
5635
13.3k
        if (SKIP_BLANKS == 0) {
5636
535
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
535
               "Space required after 'NDATA'\n");
5638
535
        }
5639
13.3k
        ndata = xmlParseName(ctxt);
5640
13.3k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
13.3k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
11.8k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
11.8k
            literal, URI, ndata);
5644
205k
    } else {
5645
205k
        if ((ctxt->sax != NULL) &&
5646
205k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
195k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
195k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
195k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
205k
        if ((ctxt->replaceEntities != 0) &&
5655
205k
      ((ctxt->myDoc == NULL) ||
5656
111k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
653
      if (ctxt->myDoc == NULL) {
5658
131
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
131
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
131
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
131
      }
5665
5666
653
      if (ctxt->myDoc->intSubset == NULL)
5667
131
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
131
            BAD_CAST "fake", NULL, NULL);
5669
653
      xmlSAX2EntityDecl(ctxt, name,
5670
653
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
653
                  literal, URI, NULL);
5672
653
        }
5673
205k
    }
5674
218k
      }
5675
570k
  }
5676
971k
  if (ctxt->instate == XML_PARSER_EOF)
5677
333
      goto done;
5678
970k
  SKIP_BLANKS;
5679
970k
  if (RAW != '>') {
5680
14.2k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
14.2k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
14.2k
      xmlHaltParser(ctxt);
5683
956k
  } else {
5684
956k
      if (inputid != ctxt->input->id) {
5685
168
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
168
                         "Entity declaration doesn't start and stop in"
5687
168
                               " the same entity\n");
5688
168
      }
5689
956k
      NEXT;
5690
956k
  }
5691
970k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
696k
      xmlEntityPtr cur = NULL;
5696
5697
696k
      if (isParameter) {
5698
352k
          if ((ctxt->sax != NULL) &&
5699
352k
        (ctxt->sax->getParameterEntity != NULL))
5700
352k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
352k
      } else {
5702
344k
          if ((ctxt->sax != NULL) &&
5703
344k
        (ctxt->sax->getEntity != NULL))
5704
344k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
344k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
22.2k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
22.2k
    }
5708
344k
      }
5709
696k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
532k
    cur->orig = orig;
5711
532k
                orig = NULL;
5712
532k
      }
5713
696k
  }
5714
5715
971k
done:
5716
971k
  if (value != NULL) xmlFree(value);
5717
971k
  if (URI != NULL) xmlFree(URI);
5718
971k
  if (literal != NULL) xmlFree(literal);
5719
971k
        if (orig != NULL) xmlFree(orig);
5720
971k
    }
5721
975k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
2.81M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
2.81M
    int val;
5757
2.81M
    xmlChar *ret;
5758
5759
2.81M
    *value = NULL;
5760
2.81M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
198k
  SKIP(9);
5762
198k
  return(XML_ATTRIBUTE_REQUIRED);
5763
198k
    }
5764
2.61M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
2.38M
  SKIP(8);
5766
2.38M
  return(XML_ATTRIBUTE_IMPLIED);
5767
2.38M
    }
5768
237k
    val = XML_ATTRIBUTE_NONE;
5769
237k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
93.3k
  SKIP(6);
5771
93.3k
  val = XML_ATTRIBUTE_FIXED;
5772
93.3k
  if (SKIP_BLANKS == 0) {
5773
266
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
266
         "Space required after '#FIXED'\n");
5775
266
  }
5776
93.3k
    }
5777
237k
    ret = xmlParseAttValue(ctxt);
5778
237k
    ctxt->instate = XML_PARSER_DTD;
5779
237k
    if (ret == NULL) {
5780
4.65k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
4.65k
           "Attribute default value declaration error\n");
5782
4.65k
    } else
5783
232k
        *value = ret;
5784
237k
    return(val);
5785
2.61M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
6.41k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
6.41k
    const xmlChar *name;
5809
6.41k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
6.41k
    if (RAW != '(') {
5812
810
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
810
  return(NULL);
5814
810
    }
5815
5.60k
    SHRINK;
5816
17.2k
    do {
5817
17.2k
        NEXT;
5818
17.2k
  SKIP_BLANKS;
5819
17.2k
        name = xmlParseName(ctxt);
5820
17.2k
  if (name == NULL) {
5821
691
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
691
         "Name expected in NOTATION declaration\n");
5823
691
            xmlFreeEnumeration(ret);
5824
691
      return(NULL);
5825
691
  }
5826
16.5k
  tmp = ret;
5827
56.8k
  while (tmp != NULL) {
5828
43.0k
      if (xmlStrEqual(name, tmp->name)) {
5829
2.71k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
2.71k
    "standalone: attribute notation value token %s duplicated\n",
5831
2.71k
         name, NULL);
5832
2.71k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
2.71k
    break;
5835
2.71k
      }
5836
40.3k
      tmp = tmp->next;
5837
40.3k
  }
5838
16.5k
  if (tmp == NULL) {
5839
13.8k
      cur = xmlCreateEnumeration(name);
5840
13.8k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
13.8k
      if (last == NULL) ret = last = cur;
5845
8.61k
      else {
5846
8.61k
    last->next = cur;
5847
8.61k
    last = cur;
5848
8.61k
      }
5849
13.8k
  }
5850
16.5k
  SKIP_BLANKS;
5851
16.5k
    } while (RAW == '|');
5852
4.91k
    if (RAW != ')') {
5853
2.45k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
2.45k
        xmlFreeEnumeration(ret);
5855
2.45k
  return(NULL);
5856
2.45k
    }
5857
2.46k
    NEXT;
5858
2.46k
    return(ret);
5859
4.91k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
329k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
329k
    xmlChar *name;
5881
329k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
329k
    if (RAW != '(') {
5884
5.59k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
5.59k
  return(NULL);
5886
5.59k
    }
5887
324k
    SHRINK;
5888
1.00M
    do {
5889
1.00M
        NEXT;
5890
1.00M
  SKIP_BLANKS;
5891
1.00M
        name = xmlParseNmtoken(ctxt);
5892
1.00M
  if (name == NULL) {
5893
965
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
965
      return(ret);
5895
965
  }
5896
999k
  tmp = ret;
5897
2.71M
  while (tmp != NULL) {
5898
1.71M
      if (xmlStrEqual(name, tmp->name)) {
5899
1.29k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
1.29k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
1.29k
         name, NULL);
5902
1.29k
    if (!xmlDictOwns(ctxt->dict, name))
5903
1.29k
        xmlFree(name);
5904
1.29k
    break;
5905
1.29k
      }
5906
1.71M
      tmp = tmp->next;
5907
1.71M
  }
5908
999k
  if (tmp == NULL) {
5909
998k
      cur = xmlCreateEnumeration(name);
5910
998k
      if (!xmlDictOwns(ctxt->dict, name))
5911
998k
    xmlFree(name);
5912
998k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
998k
      if (last == NULL) ret = last = cur;
5917
674k
      else {
5918
674k
    last->next = cur;
5919
674k
    last = cur;
5920
674k
      }
5921
998k
  }
5922
999k
  SKIP_BLANKS;
5923
999k
    } while (RAW == '|');
5924
323k
    if (RAW != ')') {
5925
2.39k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
2.39k
  return(ret);
5927
2.39k
    }
5928
320k
    NEXT;
5929
320k
    return(ret);
5930
323k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
336k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
336k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
6.75k
  SKIP(8);
5953
6.75k
  if (SKIP_BLANKS == 0) {
5954
341
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
341
         "Space required after 'NOTATION'\n");
5956
341
      return(0);
5957
341
  }
5958
6.41k
  *tree = xmlParseNotationType(ctxt);
5959
6.41k
  if (*tree == NULL) return(0);
5960
2.46k
  return(XML_ATTRIBUTE_NOTATION);
5961
6.41k
    }
5962
329k
    *tree = xmlParseEnumerationType(ctxt);
5963
329k
    if (*tree == NULL) return(0);
5964
323k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
329k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
2.83M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
2.83M
    SHRINK;
6017
2.83M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
1.18M
  SKIP(5);
6019
1.18M
  return(XML_ATTRIBUTE_CDATA);
6020
1.64M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
13.0k
  SKIP(6);
6022
13.0k
  return(XML_ATTRIBUTE_IDREFS);
6023
1.63M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
30.6k
  SKIP(5);
6025
30.6k
  return(XML_ATTRIBUTE_IDREF);
6026
1.60M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
675k
        SKIP(2);
6028
675k
  return(XML_ATTRIBUTE_ID);
6029
929k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
11.5k
  SKIP(6);
6031
11.5k
  return(XML_ATTRIBUTE_ENTITY);
6032
918k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
1.32k
  SKIP(8);
6034
1.32k
  return(XML_ATTRIBUTE_ENTITIES);
6035
916k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
160k
  SKIP(8);
6037
160k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
756k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
419k
  SKIP(7);
6040
419k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
419k
     }
6042
336k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
2.83M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
918k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
918k
    const xmlChar *elemName;
6061
918k
    const xmlChar *attrName;
6062
918k
    xmlEnumerationPtr tree;
6063
6064
918k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
918k
    SKIP(2);
6067
6068
918k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
917k
  int inputid = ctxt->input->id;
6070
6071
917k
  SKIP(7);
6072
917k
  if (SKIP_BLANKS == 0) {
6073
2.81k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
2.81k
                     "Space required after '<!ATTLIST'\n");
6075
2.81k
  }
6076
917k
        elemName = xmlParseName(ctxt);
6077
917k
  if (elemName == NULL) {
6078
1.78k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
1.78k
         "ATTLIST: no name for Element\n");
6080
1.78k
      return;
6081
1.78k
  }
6082
915k
  SKIP_BLANKS;
6083
915k
  GROW;
6084
3.72M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
2.84M
      int type;
6086
2.84M
      int def;
6087
2.84M
      xmlChar *defaultValue = NULL;
6088
6089
2.84M
      GROW;
6090
2.84M
            tree = NULL;
6091
2.84M
      attrName = xmlParseName(ctxt);
6092
2.84M
      if (attrName == NULL) {
6093
6.27k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
6.27k
             "ATTLIST: no name for Attribute\n");
6095
6.27k
    break;
6096
6.27k
      }
6097
2.83M
      GROW;
6098
2.83M
      if (SKIP_BLANKS == 0) {
6099
3.03k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
3.03k
            "Space required after the attribute name\n");
6101
3.03k
    break;
6102
3.03k
      }
6103
6104
2.83M
      type = xmlParseAttributeType(ctxt, &tree);
6105
2.83M
      if (type <= 0) {
6106
10.2k
          break;
6107
10.2k
      }
6108
6109
2.82M
      GROW;
6110
2.82M
      if (SKIP_BLANKS == 0) {
6111
4.45k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
4.45k
             "Space required after the attribute type\n");
6113
4.45k
          if (tree != NULL)
6114
3.13k
        xmlFreeEnumeration(tree);
6115
4.45k
    break;
6116
4.45k
      }
6117
6118
2.81M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
2.81M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
2.81M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
116k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
2.81M
      GROW;
6130
2.81M
            if (RAW != '>') {
6131
2.65M
    if (SKIP_BLANKS == 0) {
6132
8.60k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
8.60k
      "Space required after the attribute default value\n");
6134
8.60k
        if (defaultValue != NULL)
6135
4.16k
      xmlFree(defaultValue);
6136
8.60k
        if (tree != NULL)
6137
1.02k
      xmlFreeEnumeration(tree);
6138
8.60k
        break;
6139
8.60k
    }
6140
2.65M
      }
6141
2.80M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
2.80M
    (ctxt->sax->attributeDecl != NULL))
6143
2.50M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
2.50M
                          type, def, defaultValue, tree);
6145
302k
      else if (tree != NULL)
6146
38.4k
    xmlFreeEnumeration(tree);
6147
6148
2.80M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
2.80M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
2.80M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
138k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
138k
      }
6153
2.80M
      if (ctxt->sax2) {
6154
1.67M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
1.67M
      }
6156
2.80M
      if (defaultValue != NULL)
6157
228k
          xmlFree(defaultValue);
6158
2.80M
      GROW;
6159
2.80M
  }
6160
915k
  if (RAW == '>') {
6161
885k
      if (inputid != ctxt->input->id) {
6162
395
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
395
                               "Attribute list declaration doesn't start and"
6164
395
                               " stop in the same entity\n");
6165
395
      }
6166
885k
      NEXT;
6167
885k
  }
6168
915k
    }
6169
918k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
418k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
418k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
418k
    const xmlChar *elem = NULL;
6196
6197
418k
    GROW;
6198
418k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
418k
  SKIP(7);
6200
418k
  SKIP_BLANKS;
6201
418k
  SHRINK;
6202
418k
  if (RAW == ')') {
6203
249k
      if (ctxt->input->id != inputchk) {
6204
42
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
42
                               "Element content declaration doesn't start and"
6206
42
                               " stop in the same entity\n");
6207
42
      }
6208
249k
      NEXT;
6209
249k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
249k
      if (ret == NULL)
6211
0
          return(NULL);
6212
249k
      if (RAW == '*') {
6213
348
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
348
    NEXT;
6215
348
      }
6216
249k
      return(ret);
6217
249k
  }
6218
169k
  if ((RAW == '(') || (RAW == '|')) {
6219
167k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
167k
      if (ret == NULL) return(NULL);
6221
167k
  }
6222
1.81M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
1.64M
      NEXT;
6224
1.64M
      if (elem == NULL) {
6225
167k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
167k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
167k
    ret->c1 = cur;
6231
167k
    if (cur != NULL)
6232
167k
        cur->parent = ret;
6233
167k
    cur = ret;
6234
1.47M
      } else {
6235
1.47M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
1.47M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
1.47M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
1.47M
    if (n->c1 != NULL)
6242
1.47M
        n->c1->parent = n;
6243
1.47M
          cur->c2 = n;
6244
1.47M
    if (n != NULL)
6245
1.47M
        n->parent = cur;
6246
1.47M
    cur = n;
6247
1.47M
      }
6248
1.64M
      SKIP_BLANKS;
6249
1.64M
      elem = xmlParseName(ctxt);
6250
1.64M
      if (elem == NULL) {
6251
1.23k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
1.23k
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
1.23k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
1.23k
    return(NULL);
6255
1.23k
      }
6256
1.64M
      SKIP_BLANKS;
6257
1.64M
      GROW;
6258
1.64M
  }
6259
168k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
165k
      if (elem != NULL) {
6261
165k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
165k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
165k
    if (cur->c2 != NULL)
6264
165k
        cur->c2->parent = cur;
6265
165k
            }
6266
165k
            if (ret != NULL)
6267
165k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
165k
      if (ctxt->input->id != inputchk) {
6269
12
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
12
                               "Element content declaration doesn't start and"
6271
12
                               " stop in the same entity\n");
6272
12
      }
6273
165k
      SKIP(2);
6274
165k
  } else {
6275
2.95k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
2.95k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
2.95k
      return(NULL);
6278
2.95k
  }
6279
6280
168k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
165k
    return(ret);
6284
418k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
434k
                                       int depth) {
6321
434k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
434k
    const xmlChar *elem;
6323
434k
    xmlChar type = 0;
6324
6325
434k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
434k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
434k
    SKIP_BLANKS;
6333
434k
    GROW;
6334
434k
    if (RAW == '(') {
6335
29.6k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
29.6k
  NEXT;
6339
29.6k
  SKIP_BLANKS;
6340
29.6k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
29.6k
                                                           depth + 1);
6342
29.6k
        if (cur == NULL)
6343
6.04k
            return(NULL);
6344
23.6k
  SKIP_BLANKS;
6345
23.6k
  GROW;
6346
404k
    } else {
6347
404k
  elem = xmlParseName(ctxt);
6348
404k
  if (elem == NULL) {
6349
3.14k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
3.14k
      return(NULL);
6351
3.14k
  }
6352
401k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
401k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
401k
  GROW;
6358
401k
  if (RAW == '?') {
6359
47.4k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
47.4k
      NEXT;
6361
354k
  } else if (RAW == '*') {
6362
50.8k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
50.8k
      NEXT;
6364
303k
  } else if (RAW == '+') {
6365
50.0k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
50.0k
      NEXT;
6367
253k
  } else {
6368
253k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
253k
  }
6370
401k
  GROW;
6371
401k
    }
6372
425k
    SKIP_BLANKS;
6373
425k
    SHRINK;
6374
1.65M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
1.23M
        if (RAW == ',') {
6379
432k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
271k
      else if (type != CUR) {
6385
77
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
77
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
77
                      type);
6388
77
    if ((last != NULL) && (last != ret))
6389
77
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
77
    if (ret != NULL)
6391
77
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
77
    return(NULL);
6393
77
      }
6394
432k
      NEXT;
6395
6396
432k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
432k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
432k
      if (last == NULL) {
6404
160k
    op->c1 = ret;
6405
160k
    if (ret != NULL)
6406
160k
        ret->parent = op;
6407
160k
    ret = cur = op;
6408
271k
      } else {
6409
271k
          cur->c2 = op;
6410
271k
    if (op != NULL)
6411
271k
        op->parent = cur;
6412
271k
    op->c1 = last;
6413
271k
    if (last != NULL)
6414
271k
        last->parent = op;
6415
271k
    cur =op;
6416
271k
    last = NULL;
6417
271k
      }
6418
805k
  } else if (RAW == '|') {
6419
798k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
682k
      else if (type != CUR) {
6425
55
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
55
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
55
          type);
6428
55
    if ((last != NULL) && (last != ret))
6429
55
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
55
    if (ret != NULL)
6431
55
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
55
    return(NULL);
6433
55
      }
6434
798k
      NEXT;
6435
6436
798k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
798k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
798k
      if (last == NULL) {
6445
116k
    op->c1 = ret;
6446
116k
    if (ret != NULL)
6447
116k
        ret->parent = op;
6448
116k
    ret = cur = op;
6449
682k
      } else {
6450
682k
          cur->c2 = op;
6451
682k
    if (op != NULL)
6452
682k
        op->parent = cur;
6453
682k
    op->c1 = last;
6454
682k
    if (last != NULL)
6455
682k
        last->parent = op;
6456
682k
    cur =op;
6457
682k
    last = NULL;
6458
682k
      }
6459
798k
  } else {
6460
6.51k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
6.51k
      if ((last != NULL) && (last != ret))
6462
2.65k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
6.51k
      if (ret != NULL)
6464
6.51k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
6.51k
      return(NULL);
6466
6.51k
  }
6467
1.23M
  GROW;
6468
1.23M
  SKIP_BLANKS;
6469
1.23M
  GROW;
6470
1.23M
  if (RAW == '(') {
6471
55.5k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
55.5k
      NEXT;
6474
55.5k
      SKIP_BLANKS;
6475
55.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
55.5k
                                                          depth + 1);
6477
55.5k
            if (last == NULL) {
6478
1.77k
    if (ret != NULL)
6479
1.77k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
1.77k
    return(NULL);
6481
1.77k
            }
6482
53.7k
      SKIP_BLANKS;
6483
1.17M
  } else {
6484
1.17M
      elem = xmlParseName(ctxt);
6485
1.17M
      if (elem == NULL) {
6486
2.22k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.22k
    if (ret != NULL)
6488
2.22k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.22k
    return(NULL);
6490
2.22k
      }
6491
1.17M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
1.17M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
1.17M
      if (RAW == '?') {
6498
184k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
184k
    NEXT;
6500
989k
      } else if (RAW == '*') {
6501
113k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
113k
    NEXT;
6503
875k
      } else if (RAW == '+') {
6504
20.3k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
20.3k
    NEXT;
6506
855k
      } else {
6507
855k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
855k
      }
6509
1.17M
  }
6510
1.22M
  SKIP_BLANKS;
6511
1.22M
  GROW;
6512
1.22M
    }
6513
414k
    if ((cur != NULL) && (last != NULL)) {
6514
270k
        cur->c2 = last;
6515
270k
  if (last != NULL)
6516
270k
      last->parent = cur;
6517
270k
    }
6518
414k
    if (ctxt->input->id != inputchk) {
6519
292
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
292
                       "Element content declaration doesn't start and stop in"
6521
292
                       " the same entity\n");
6522
292
    }
6523
414k
    NEXT;
6524
414k
    if (RAW == '?') {
6525
17.8k
  if (ret != NULL) {
6526
17.8k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
17.8k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
117
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
17.7k
      else
6530
17.7k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
17.8k
  }
6532
17.8k
  NEXT;
6533
396k
    } else if (RAW == '*') {
6534
111k
  if (ret != NULL) {
6535
111k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
111k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
636k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
525k
    if ((cur->c1 != NULL) &&
6543
525k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
525k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
14.3k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
525k
    if ((cur->c2 != NULL) &&
6547
525k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
525k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
2.36k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
525k
    cur = cur->c2;
6551
525k
      }
6552
111k
  }
6553
111k
  NEXT;
6554
285k
    } else if (RAW == '+') {
6555
50.7k
  if (ret != NULL) {
6556
50.7k
      int found = 0;
6557
6558
50.7k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
50.7k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
54
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
50.6k
      else
6562
50.6k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
81.8k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
31.1k
    if ((cur->c1 != NULL) &&
6570
31.1k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
31.1k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
234
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
234
        found = 1;
6574
234
    }
6575
31.1k
    if ((cur->c2 != NULL) &&
6576
31.1k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
31.1k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
222
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
222
        found = 1;
6580
222
    }
6581
31.1k
    cur = cur->c2;
6582
31.1k
      }
6583
50.7k
      if (found)
6584
243
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
50.7k
  }
6586
50.7k
  NEXT;
6587
50.7k
    }
6588
414k
    return(ret);
6589
425k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
768k
                           xmlElementContentPtr *result) {
6648
6649
768k
    xmlElementContentPtr tree = NULL;
6650
768k
    int inputid = ctxt->input->id;
6651
768k
    int res;
6652
6653
768k
    *result = NULL;
6654
6655
768k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
768k
    NEXT;
6661
768k
    GROW;
6662
768k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
768k
    SKIP_BLANKS;
6665
768k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
418k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
418k
  res = XML_ELEMENT_TYPE_MIXED;
6668
418k
    } else {
6669
349k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
349k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
349k
    }
6672
768k
    SKIP_BLANKS;
6673
768k
    *result = tree;
6674
768k
    return(res);
6675
768k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
1.06M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
1.06M
    const xmlChar *name;
6695
1.06M
    int ret = -1;
6696
1.06M
    xmlElementContentPtr content  = NULL;
6697
6698
1.06M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
1.06M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
1.06M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
1.06M
  int inputid = ctxt->input->id;
6705
6706
1.06M
  SKIP(7);
6707
1.06M
  if (SKIP_BLANKS == 0) {
6708
1.30k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
1.30k
               "Space required after 'ELEMENT'\n");
6710
1.30k
      return(-1);
6711
1.30k
  }
6712
1.06M
        name = xmlParseName(ctxt);
6713
1.06M
  if (name == NULL) {
6714
1.57k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
1.57k
         "xmlParseElementDecl: no name for Element\n");
6716
1.57k
      return(-1);
6717
1.57k
  }
6718
1.06M
  if (SKIP_BLANKS == 0) {
6719
4.93k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
4.93k
         "Space required after the element name\n");
6721
4.93k
  }
6722
1.06M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
281k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
281k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
778k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
778k
             (NXT(2) == 'Y')) {
6730
4.79k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
4.79k
      ret = XML_ELEMENT_TYPE_ANY;
6735
774k
  } else if (RAW == '(') {
6736
768k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
768k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
6.00k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
6.00k
          (ctxt->inputNr == 1)) {
6743
460
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
460
    "PEReference: forbidden within markup decl in internal subset\n");
6745
5.54k
      } else {
6746
5.54k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
5.54k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
5.54k
            }
6749
6.00k
      return(-1);
6750
6.00k
  }
6751
6752
1.05M
  SKIP_BLANKS;
6753
6754
1.05M
  if (RAW != '>') {
6755
14.8k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
14.8k
      if (content != NULL) {
6757
2.27k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
2.27k
      }
6759
1.03M
  } else {
6760
1.03M
      if (inputid != ctxt->input->id) {
6761
73
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
73
                               "Element declaration doesn't start and stop in"
6763
73
                               " the same entity\n");
6764
73
      }
6765
6766
1.03M
      NEXT;
6767
1.03M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
1.03M
    (ctxt->sax->elementDecl != NULL)) {
6769
933k
    if (content != NULL)
6770
676k
        content->parent = NULL;
6771
933k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
933k
                           content);
6773
933k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
91.0k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
91.0k
    }
6782
933k
      } else if (content != NULL) {
6783
73.4k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
73.4k
      }
6785
1.03M
  }
6786
1.05M
    }
6787
1.05M
    return(ret);
6788
1.06M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
6.32k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
6.32k
    int *inputIds = NULL;
6806
6.32k
    size_t inputIdsSize = 0;
6807
6.32k
    size_t depth = 0;
6808
6809
40.6k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
40.4k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
18.6k
            int id = ctxt->input->id;
6812
6813
18.6k
            SKIP(3);
6814
18.6k
            SKIP_BLANKS;
6815
6816
18.6k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
15.3k
                SKIP(7);
6818
15.3k
                SKIP_BLANKS;
6819
15.3k
                if (RAW != '[') {
6820
147
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
147
                    xmlHaltParser(ctxt);
6822
147
                    goto error;
6823
147
                }
6824
15.1k
                if (ctxt->input->id != id) {
6825
51
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
51
                                   "All markup of the conditional section is"
6827
51
                                   " not in the same entity\n");
6828
51
                }
6829
15.1k
                NEXT;
6830
6831
15.1k
                if (inputIdsSize <= depth) {
6832
4.33k
                    int *tmp;
6833
6834
4.33k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
4.33k
                    tmp = (int *) xmlRealloc(inputIds,
6836
4.33k
                            inputIdsSize * sizeof(int));
6837
4.33k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
4.33k
                    inputIds = tmp;
6842
4.33k
                }
6843
15.1k
                inputIds[depth] = id;
6844
15.1k
                depth++;
6845
15.1k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
2.57k
                size_t ignoreDepth = 0;
6847
6848
2.57k
                SKIP(6);
6849
2.57k
                SKIP_BLANKS;
6850
2.57k
                if (RAW != '[') {
6851
144
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
144
                    xmlHaltParser(ctxt);
6853
144
                    goto error;
6854
144
                }
6855
2.42k
                if (ctxt->input->id != id) {
6856
18
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
18
                                   "All markup of the conditional section is"
6858
18
                                   " not in the same entity\n");
6859
18
                }
6860
2.42k
                NEXT;
6861
6862
4.40M
                while (RAW != 0) {
6863
4.40M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
6.93k
                        SKIP(3);
6865
6.93k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
6.93k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
4.39M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
4.39M
                               (NXT(2) == '>')) {
6873
5.85k
                        if (ignoreDepth == 0)
6874
1.31k
                            break;
6875
4.54k
                        SKIP(3);
6876
4.54k
                        ignoreDepth--;
6877
4.39M
                    } else {
6878
4.39M
                        NEXT;
6879
4.39M
                    }
6880
4.40M
                }
6881
6882
2.42k
    if (RAW == 0) {
6883
1.11k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
1.11k
                    goto error;
6885
1.11k
    }
6886
1.31k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
1.31k
                SKIP(3);
6892
1.31k
            } else {
6893
773
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
773
                xmlHaltParser(ctxt);
6895
773
                goto error;
6896
773
            }
6897
21.8k
        } else if ((depth > 0) &&
6898
21.8k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
8.41k
            depth--;
6900
8.41k
            if (ctxt->input->id != inputIds[depth]) {
6901
348
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
348
                               "All markup of the conditional section is not"
6903
348
                               " in the same entity\n");
6904
348
            }
6905
8.41k
            SKIP(3);
6906
13.4k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
12.1k
            xmlParseMarkupDecl(ctxt);
6908
12.1k
        } else {
6909
1.24k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
1.24k
            xmlHaltParser(ctxt);
6911
1.24k
            goto error;
6912
1.24k
        }
6913
6914
37.0k
        if (depth == 0)
6915
2.73k
            break;
6916
6917
34.3k
        SKIP_BLANKS;
6918
34.3k
        GROW;
6919
34.3k
    }
6920
6921
6.32k
error:
6922
6.32k
    xmlFree(inputIds);
6923
6.32k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
41.2M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
41.2M
    GROW;
6952
41.2M
    if (CUR == '<') {
6953
41.2M
        if (NXT(1) == '!') {
6954
41.1M
      switch (NXT(2)) {
6955
2.04M
          case 'E':
6956
2.04M
        if (NXT(3) == 'L')
6957
1.06M
      xmlParseElementDecl(ctxt);
6958
976k
        else if (NXT(3) == 'N')
6959
975k
      xmlParseEntityDecl(ctxt);
6960
469
                    else
6961
469
                        SKIP(2);
6962
2.04M
        break;
6963
918k
          case 'A':
6964
918k
        xmlParseAttributeListDecl(ctxt);
6965
918k
        break;
6966
26.1k
          case 'N':
6967
26.1k
        xmlParseNotationDecl(ctxt);
6968
26.1k
        break;
6969
38.1M
          case '-':
6970
38.1M
        xmlParseComment(ctxt);
6971
38.1M
        break;
6972
8.94k
    default:
6973
        /* there is an error but it will be detected later */
6974
8.94k
                    SKIP(2);
6975
8.94k
        break;
6976
41.1M
      }
6977
41.1M
  } else if (NXT(1) == '?') {
6978
44.3k
      xmlParsePI(ctxt);
6979
44.3k
  }
6980
41.2M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
41.2M
    if (ctxt->instate == XML_PARSER_EOF)
6987
14.8k
        return;
6988
6989
41.1M
    ctxt->instate = XML_PARSER_DTD;
6990
41.1M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
35.6k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
35.6k
    xmlChar *version;
7006
35.6k
    const xmlChar *encoding;
7007
35.6k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
35.6k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
35.4k
  SKIP(5);
7014
35.4k
    } else {
7015
179
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
179
  return;
7017
179
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
35.4k
    oldstate = ctxt->instate;
7021
35.4k
    ctxt->instate = XML_PARSER_START;
7022
7023
35.4k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
35.4k
    version = xmlParseVersionInfo(ctxt);
7032
35.4k
    if (version == NULL)
7033
12.2k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
23.2k
    else {
7035
23.2k
  if (SKIP_BLANKS == 0) {
7036
2.99k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
2.99k
               "Space needed here\n");
7038
2.99k
  }
7039
23.2k
    }
7040
35.4k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
35.4k
    encoding = xmlParseEncodingDecl(ctxt);
7046
35.4k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
35.4k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
783
        ctxt->instate = oldstate;
7053
783
        return;
7054
783
    }
7055
34.6k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
4.13k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
4.13k
           "Missing encoding in text declaration\n");
7058
4.13k
    }
7059
7060
34.6k
    SKIP_BLANKS;
7061
34.6k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
7.25k
        SKIP(2);
7063
27.4k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
665
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
665
  NEXT;
7067
26.7k
    } else {
7068
26.7k
        int c;
7069
7070
26.7k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
1.79M
        while ((c = CUR) != 0) {
7072
1.77M
            NEXT;
7073
1.77M
            if (c == '>')
7074
11.0k
                break;
7075
1.77M
        }
7076
26.7k
    }
7077
7078
34.6k
    ctxt->instate = oldstate;
7079
34.6k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
29.2k
                       const xmlChar *SystemID) {
7096
29.2k
    xmlDetectSAX2(ctxt);
7097
29.2k
    GROW;
7098
7099
29.2k
    if ((ctxt->encoding == NULL) &&
7100
29.2k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
29.1k
        xmlChar start[4];
7102
29.1k
  xmlCharEncoding enc;
7103
7104
29.1k
  start[0] = RAW;
7105
29.1k
  start[1] = NXT(1);
7106
29.1k
  start[2] = NXT(2);
7107
29.1k
  start[3] = NXT(3);
7108
29.1k
  enc = xmlDetectCharEncoding(start, 4);
7109
29.1k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
5.51k
      xmlSwitchEncoding(ctxt, enc);
7111
29.1k
    }
7112
7113
29.2k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
5.05k
  xmlParseTextDecl(ctxt);
7115
5.05k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
72
      xmlHaltParser(ctxt);
7120
72
      return;
7121
72
  }
7122
5.05k
    }
7123
29.1k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
29.1k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
29.1k
    ctxt->instate = XML_PARSER_DTD;
7135
29.1k
    ctxt->external = 1;
7136
29.1k
    SKIP_BLANKS;
7137
987k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
965k
  GROW;
7139
965k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
6.32k
            xmlParseConditionalSections(ctxt);
7141
959k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
952k
            xmlParseMarkupDecl(ctxt);
7143
952k
        } else {
7144
6.91k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
6.91k
            xmlHaltParser(ctxt);
7146
6.91k
            return;
7147
6.91k
        }
7148
958k
        SKIP_BLANKS;
7149
958k
    }
7150
7151
22.2k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
22.2k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
3.82M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
3.82M
    xmlEntityPtr ent;
7175
3.82M
    xmlChar *val;
7176
3.82M
    int was_checked;
7177
3.82M
    xmlNodePtr list = NULL;
7178
3.82M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
3.82M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
3.82M
    if (NXT(1) == '#') {
7188
174k
  int i = 0;
7189
174k
  xmlChar out[16];
7190
174k
  int hex = NXT(2);
7191
174k
  int value = xmlParseCharRef(ctxt);
7192
7193
174k
  if (value == 0)
7194
26.7k
      return;
7195
147k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
94.5k
      if (value <= 0xFF) {
7202
91.2k
    out[0] = value;
7203
91.2k
    out[1] = 0;
7204
91.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
91.2k
        (!ctxt->disableSAX))
7206
61.6k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
91.2k
      } else {
7208
3.24k
    if ((hex == 'x') || (hex == 'X'))
7209
560
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
2.68k
    else
7211
2.68k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
3.24k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
3.24k
        (!ctxt->disableSAX))
7214
1.61k
        ctxt->sax->reference(ctxt->userData, out);
7215
3.24k
      }
7216
94.5k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
52.9k
      COPY_BUF(0 ,out, i, value);
7221
52.9k
      out[i] = 0;
7222
52.9k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
52.9k
    (!ctxt->disableSAX))
7224
43.7k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
52.9k
  }
7226
147k
  return;
7227
174k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
3.65M
    ent = xmlParseEntityRef(ctxt);
7233
3.65M
    if (ent == NULL) return;
7234
2.97M
    if (!ctxt->wellFormed)
7235
880k
  return;
7236
2.09M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
2.09M
    if ((ent->name == NULL) ||
7240
2.09M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
126k
  val = ent->content;
7242
126k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
126k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
126k
      (!ctxt->disableSAX))
7248
126k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
126k
  return;
7250
126k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
1.96M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
1.96M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
117k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
111k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
111k
  void *user_data;
7273
111k
  if (ctxt->userData == ctxt)
7274
111k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
111k
        ctxt->sizeentcopy = 0;
7280
7281
111k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
353
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
353
            xmlHaltParser(ctxt);
7284
353
            return;
7285
353
        }
7286
7287
110k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
110k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
53.0k
      ctxt->depth++;
7297
53.0k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
53.0k
                                                user_data, &list);
7299
53.0k
      ctxt->depth--;
7300
7301
57.6k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
57.6k
      ctxt->depth++;
7303
57.6k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
57.6k
                                     user_data, ctxt->depth, ent->URI,
7305
57.6k
             ent->ExternalID, &list);
7306
57.6k
      ctxt->depth--;
7307
57.6k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
110k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
110k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
110k
        ent->expandedSize = ctxt->sizeentcopy;
7316
110k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
4.75k
            xmlHaltParser(ctxt);
7318
4.75k
      xmlFreeNodeList(list);
7319
4.75k
      return;
7320
4.75k
  }
7321
105k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
105k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
47.6k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
47.6k
            if ((ctxt->replaceEntities == 0) ||
7333
47.6k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
47.6k
                ((list->type == XML_TEXT_NODE) &&
7335
39.5k
                 (list->next == NULL))) {
7336
39.5k
                ent->owner = 1;
7337
733k
                while (list != NULL) {
7338
694k
                    list->parent = (xmlNodePtr) ent;
7339
694k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
694k
                    if (list->next == NULL)
7342
39.5k
                        ent->last = list;
7343
694k
                    list = list->next;
7344
694k
                }
7345
39.5k
                list = NULL;
7346
39.5k
            } else {
7347
8.13k
                ent->owner = 0;
7348
1.20M
                while (list != NULL) {
7349
1.19M
                    list->parent = (xmlNodePtr) ctxt->node;
7350
1.19M
                    list->doc = ctxt->myDoc;
7351
1.19M
                    if (list->next == NULL)
7352
8.13k
                        ent->last = list;
7353
1.19M
                    list = list->next;
7354
1.19M
                }
7355
8.13k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
8.13k
            }
7361
58.2k
  } else if ((ret != XML_ERR_OK) &&
7362
58.2k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
27.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
27.9k
         "Entity '%s' failed to parse\n", ent->name);
7365
27.9k
            if (ent->content != NULL)
7366
5.34k
                ent->content[0] = 0;
7367
30.2k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
105k
        was_checked = 0;
7374
105k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
1.96M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
599k
  if (was_checked != 0) {
7389
534k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
534k
      if (ctxt->userData == ctxt)
7396
534k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
534k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
38.5k
    ctxt->depth++;
7402
38.5k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
38.5k
           ent->content, user_data, NULL);
7404
38.5k
    ctxt->depth--;
7405
495k
      } else if (ent->etype ==
7406
495k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
495k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
495k
    ctxt->depth++;
7410
495k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
495k
         ctxt->sax, user_data, ctxt->depth,
7412
495k
         ent->URI, ent->ExternalID, NULL);
7413
495k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
495k
                ctxt->sizeentities = oldsizeentities;
7417
495k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
534k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
534k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
534k
  }
7429
599k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
599k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
121k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
121k
  }
7437
599k
  return;
7438
599k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
1.36M
    if ((was_checked != 0) &&
7445
1.36M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
192
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
1.36M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
1.36M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
281k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
281k
  return;
7458
281k
    }
7459
7460
1.08M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
1.08M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
1.08M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
1.08M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
340k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
340k
    cur = ent->children;
7492
1.03M
    while (cur != NULL) {
7493
1.03M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
1.03M
        if (nw != NULL) {
7495
1.03M
      if (nw->_private == NULL)
7496
1.03M
          nw->_private = cur->_private;
7497
1.03M
      if (firstChild == NULL){
7498
340k
          firstChild = nw;
7499
340k
      }
7500
1.03M
      nw = xmlAddChild(ctxt->node, nw);
7501
1.03M
        }
7502
1.03M
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
340k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
340k
          (nw != NULL) &&
7509
340k
          (nw->type == XML_ELEMENT_NODE) &&
7510
340k
          (nw->children == NULL))
7511
3.52k
          nw->extra = 1;
7512
7513
340k
      break;
7514
340k
        }
7515
697k
        cur = cur->next;
7516
697k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
739k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
739k
    xmlNodePtr nw = NULL, cur, next, last,
7523
739k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
739k
    cur = ent->children;
7532
739k
    ent->children = NULL;
7533
739k
    last = ent->last;
7534
739k
    ent->last = NULL;
7535
4.23M
    while (cur != NULL) {
7536
4.23M
        next = cur->next;
7537
4.23M
        cur->next = NULL;
7538
4.23M
        cur->parent = NULL;
7539
4.23M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
4.23M
        if (nw != NULL) {
7541
4.23M
      if (nw->_private == NULL)
7542
4.23M
          nw->_private = cur->_private;
7543
4.23M
      if (firstChild == NULL){
7544
739k
          firstChild = cur;
7545
739k
      }
7546
4.23M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
4.23M
        }
7548
4.23M
        xmlAddChild(ctxt->node, cur);
7549
4.23M
        if (cur == last)
7550
739k
      break;
7551
3.49M
        cur = next;
7552
3.49M
    }
7553
739k
    if (ent->owner == 0)
7554
8.13k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
739k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
1.08M
      ctxt->nodemem = 0;
7582
1.08M
      ctxt->nodelen = 0;
7583
1.08M
      return;
7584
1.08M
  }
7585
1.08M
    }
7586
1.08M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
5.94M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
5.94M
    const xmlChar *name;
7621
5.94M
    xmlEntityPtr ent = NULL;
7622
7623
5.94M
    GROW;
7624
5.94M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
5.94M
    if (RAW != '&')
7628
0
        return(NULL);
7629
5.94M
    NEXT;
7630
5.94M
    name = xmlParseName(ctxt);
7631
5.94M
    if (name == NULL) {
7632
84.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
84.7k
           "xmlParseEntityRef: no name\n");
7634
84.7k
        return(NULL);
7635
84.7k
    }
7636
5.86M
    if (RAW != ';') {
7637
65.7k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
65.7k
  return(NULL);
7639
65.7k
    }
7640
5.79M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
5.79M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
4.07M
        ent = xmlGetPredefinedEntity(name);
7647
4.07M
        if (ent != NULL)
7648
201k
            return(ent);
7649
4.07M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
5.59M
    if (ctxt->sax != NULL) {
7656
5.59M
  if (ctxt->sax->getEntity != NULL)
7657
5.59M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
5.59M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
5.59M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
39.0k
      ent = xmlGetPredefinedEntity(name);
7661
5.59M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
5.59M
      (ctxt->userData==ctxt)) {
7663
84.8k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
84.8k
  }
7665
5.59M
    }
7666
5.59M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
5.59M
    if (ent == NULL) {
7690
808k
  if ((ctxt->standalone == 1) ||
7691
808k
      ((ctxt->hasExternalSubset == 0) &&
7692
762k
       (ctxt->hasPErefs == 0))) {
7693
522k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
522k
         "Entity '%s' not defined\n", name);
7695
522k
  } else {
7696
286k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
286k
         "Entity '%s' not defined\n", name);
7698
286k
      if ((ctxt->inSubset == 0) &&
7699
286k
    (ctxt->sax != NULL) &&
7700
286k
    (ctxt->sax->reference != NULL)) {
7701
276k
    ctxt->sax->reference(ctxt->userData, name);
7702
276k
      }
7703
286k
  }
7704
808k
  ctxt->valid = 0;
7705
808k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
4.78M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
2.46k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
2.46k
     "Entity reference to unparsed entity %s\n", name);
7715
2.46k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
4.78M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
4.78M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
10.4k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
10.4k
       "Attribute references external entity '%s'\n", name);
7726
10.4k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
4.77M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
4.77M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.95M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
24.2k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
704
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
24.2k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
24.2k
        }
7740
1.95M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
17.0k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
17.0k
                    "'<' in entity '%s' is not allowed in attributes "
7743
17.0k
                    "values\n", name);
7744
1.95M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
2.81M
    else {
7750
2.81M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
2.81M
      default:
7758
2.81M
      break;
7759
2.81M
  }
7760
2.81M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
5.59M
    return(ent);
7769
5.59M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
25.2M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
25.2M
    xmlChar *name;
7805
25.2M
    const xmlChar *ptr;
7806
25.2M
    xmlChar cur;
7807
25.2M
    xmlEntityPtr ent = NULL;
7808
7809
25.2M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
25.2M
    ptr = *str;
7812
25.2M
    cur = *ptr;
7813
25.2M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
25.2M
    ptr++;
7817
25.2M
    name = xmlParseStringName(ctxt, &ptr);
7818
25.2M
    if (name == NULL) {
7819
2.36k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
2.36k
           "xmlParseStringEntityRef: no name\n");
7821
2.36k
  *str = ptr;
7822
2.36k
  return(NULL);
7823
2.36k
    }
7824
25.2M
    if (*ptr != ';') {
7825
6.98k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
6.98k
        xmlFree(name);
7827
6.98k
  *str = ptr;
7828
6.98k
  return(NULL);
7829
6.98k
    }
7830
25.2M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
25.2M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
23.9M
        ent = xmlGetPredefinedEntity(name);
7838
23.9M
        if (ent != NULL) {
7839
33.5k
            xmlFree(name);
7840
33.5k
            *str = ptr;
7841
33.5k
            return(ent);
7842
33.5k
        }
7843
23.9M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
25.2M
    if (ctxt->sax != NULL) {
7850
25.2M
  if (ctxt->sax->getEntity != NULL)
7851
25.2M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
25.2M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
84.8k
      ent = xmlGetPredefinedEntity(name);
7854
25.2M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
589k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
589k
  }
7857
25.2M
    }
7858
25.2M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
25.2M
    if (ent == NULL) {
7885
589k
  if ((ctxt->standalone == 1) ||
7886
589k
      ((ctxt->hasExternalSubset == 0) &&
7887
577k
       (ctxt->hasPErefs == 0))) {
7888
573k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
573k
         "Entity '%s' not defined\n", name);
7890
573k
  } else {
7891
16.8k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
16.8k
        "Entity '%s' not defined\n",
7893
16.8k
        name);
7894
16.8k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
589k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
24.6M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
967
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
967
     "Entity reference to unparsed entity %s\n", name);
7906
967
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
24.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
24.6M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
13.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
13.8k
   "Attribute references external entity '%s'\n", name);
7917
13.8k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
24.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
24.6M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
24.3M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
11.4k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
806
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
11.4k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
11.4k
        }
7931
24.3M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
99.4k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
99.4k
                    "'<' in entity '%s' is not allowed in attributes "
7934
99.4k
                    "values\n", name);
7935
24.3M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
298k
    else {
7941
298k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
298k
      default:
7949
298k
      break;
7950
298k
  }
7951
298k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
25.2M
    xmlFree(name);
7961
25.2M
    *str = ptr;
7962
25.2M
    return(ent);
7963
25.2M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
49.9M
{
8000
49.9M
    const xmlChar *name;
8001
49.9M
    xmlEntityPtr entity = NULL;
8002
49.9M
    xmlParserInputPtr input;
8003
8004
49.9M
    if (RAW != '%')
8005
0
        return;
8006
49.9M
    NEXT;
8007
49.9M
    name = xmlParseName(ctxt);
8008
49.9M
    if (name == NULL) {
8009
7.70k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
7.70k
  return;
8011
7.70k
    }
8012
49.9M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
49.9M
    if (RAW != ';') {
8016
414k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
414k
        return;
8018
414k
    }
8019
8020
49.5M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
49.5M
    if ((ctxt->sax != NULL) &&
8026
49.5M
  (ctxt->sax->getParameterEntity != NULL))
8027
49.5M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
49.5M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
49.5M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
10.7M
  if ((ctxt->standalone == 1) ||
8040
10.7M
      ((ctxt->hasExternalSubset == 0) &&
8041
10.7M
       (ctxt->hasPErefs == 0))) {
8042
1.77k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
1.77k
            "PEReference: %%%s; not found\n",
8044
1.77k
            name);
8045
10.7M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
10.7M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
456k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
456k
                                 "PEReference: %%%s; not found\n",
8056
456k
                                 name, NULL);
8057
456k
            } else
8058
10.3M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
10.3M
                              "PEReference: %%%s; not found\n",
8060
10.3M
                              name, NULL);
8061
10.7M
            ctxt->valid = 0;
8062
10.7M
  }
8063
38.8M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
38.8M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
38.8M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
38.8M
  } else {
8073
38.8M
            xmlChar start[4];
8074
38.8M
            xmlCharEncoding enc;
8075
38.8M
            unsigned long parentConsumed;
8076
38.8M
            xmlEntityPtr oldEnt;
8077
8078
38.8M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
38.8M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
38.8M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
38.8M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
38.8M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
38.8M
    (ctxt->replaceEntities == 0) &&
8084
38.8M
    (ctxt->validate == 0))
8085
1.20k
    return;
8086
8087
38.7M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
396
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
396
                xmlHaltParser(ctxt);
8090
396
                return;
8091
396
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
38.7M
            parentConsumed = ctxt->input->parentConsumed;
8095
38.7M
            oldEnt = ctxt->input->entity;
8096
38.7M
            if ((oldEnt == NULL) ||
8097
38.7M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
38.4M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
861k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
861k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
861k
                                     ctxt->input->cur - ctxt->input->base);
8102
861k
            }
8103
8104
38.7M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
38.7M
      if (xmlPushInput(ctxt, input) < 0) {
8106
9.63k
                xmlFreeInputStream(input);
8107
9.63k
    return;
8108
9.63k
            }
8109
8110
38.7M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
38.7M
            input->parentConsumed = parentConsumed;
8113
8114
38.7M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
67.9k
                GROW
8125
67.9k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
67.9k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
67.1k
                    start[0] = RAW;
8129
67.1k
                    start[1] = NXT(1);
8130
67.1k
                    start[2] = NXT(2);
8131
67.1k
                    start[3] = NXT(3);
8132
67.1k
                    enc = xmlDetectCharEncoding(start, 4);
8133
67.1k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
31.3k
                        xmlSwitchEncoding(ctxt, enc);
8135
31.3k
                    }
8136
67.1k
                }
8137
8138
67.9k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
67.9k
                    (IS_BLANK_CH(NXT(5)))) {
8140
27.1k
                    xmlParseTextDecl(ctxt);
8141
27.1k
                }
8142
67.9k
            }
8143
38.7M
  }
8144
38.8M
    }
8145
49.5M
    ctxt->hasPErefs = 1;
8146
49.5M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
4.38k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
4.38k
    xmlParserInputPtr input;
8162
4.38k
    xmlBufferPtr buf;
8163
4.38k
    int l, c;
8164
4.38k
    int count = 0;
8165
8166
4.38k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
4.38k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
4.38k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
4.38k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
4.38k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
4.38k
    buf = xmlBufferCreate();
8180
4.38k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
4.38k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
4.38k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
4.38k
    if (input == NULL) {
8189
1.13k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
1.13k
              "xmlLoadEntityContent input error");
8191
1.13k
  xmlBufferFree(buf);
8192
1.13k
        return(-1);
8193
1.13k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
3.25k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
3.25k
    GROW;
8206
3.25k
    c = CUR_CHAR(l);
8207
11.1M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
11.1M
           (IS_CHAR(c))) {
8209
11.1M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
11.1M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
107k
      count = 0;
8212
107k
      GROW;
8213
107k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
107k
  }
8218
11.1M
  NEXTL(l);
8219
11.1M
  c = CUR_CHAR(l);
8220
11.1M
  if (c == 0) {
8221
2.77k
      count = 0;
8222
2.77k
      GROW;
8223
2.77k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
2.77k
      c = CUR_CHAR(l);
8228
2.77k
  }
8229
11.1M
    }
8230
8231
3.25k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
1.69k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
1.69k
        xmlPopInput(ctxt);
8234
1.69k
    } else if (!IS_CHAR(c)) {
8235
1.56k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
1.56k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
1.56k
                    c);
8238
1.56k
  xmlBufferFree(buf);
8239
1.56k
  return(-1);
8240
1.56k
    }
8241
1.69k
    entity->content = buf->content;
8242
1.69k
    entity->length = buf->use;
8243
1.69k
    buf->content = NULL;
8244
1.69k
    xmlBufferFree(buf);
8245
8246
1.69k
    return(0);
8247
3.25k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
1.20M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
1.20M
    const xmlChar *ptr;
8283
1.20M
    xmlChar cur;
8284
1.20M
    xmlChar *name;
8285
1.20M
    xmlEntityPtr entity = NULL;
8286
8287
1.20M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
1.20M
    ptr = *str;
8289
1.20M
    cur = *ptr;
8290
1.20M
    if (cur != '%')
8291
0
        return(NULL);
8292
1.20M
    ptr++;
8293
1.20M
    name = xmlParseStringName(ctxt, &ptr);
8294
1.20M
    if (name == NULL) {
8295
8.23k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
8.23k
           "xmlParseStringPEReference: no name\n");
8297
8.23k
  *str = ptr;
8298
8.23k
  return(NULL);
8299
8.23k
    }
8300
1.19M
    cur = *ptr;
8301
1.19M
    if (cur != ';') {
8302
2.61k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
2.61k
  xmlFree(name);
8304
2.61k
  *str = ptr;
8305
2.61k
  return(NULL);
8306
2.61k
    }
8307
1.18M
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
1.18M
    if ((ctxt->sax != NULL) &&
8313
1.18M
  (ctxt->sax->getParameterEntity != NULL))
8314
1.18M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
1.18M
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
1.18M
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
503k
  if ((ctxt->standalone == 1) ||
8330
503k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
702
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
702
     "PEReference: %%%s; not found\n", name);
8333
502k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
502k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
502k
        "PEReference: %%%s; not found\n",
8343
502k
        name, NULL);
8344
502k
      ctxt->valid = 0;
8345
502k
  }
8346
686k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
686k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
686k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
686k
    }
8357
1.18M
    ctxt->hasPErefs = 1;
8358
1.18M
    xmlFree(name);
8359
1.18M
    *str = ptr;
8360
1.18M
    return(entity);
8361
1.18M
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
250k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
250k
    const xmlChar *name = NULL;
8382
250k
    xmlChar *ExternalID = NULL;
8383
250k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
250k
    SKIP(9);
8389
8390
250k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
250k
    name = xmlParseName(ctxt);
8396
250k
    if (name == NULL) {
8397
559
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
559
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
559
    }
8400
250k
    ctxt->intSubName = name;
8401
8402
250k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
250k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
250k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
100k
        ctxt->hasExternalSubset = 1;
8411
100k
    }
8412
250k
    ctxt->extSubURI = URI;
8413
250k
    ctxt->extSubSystem = ExternalID;
8414
8415
250k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
250k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
250k
  (!ctxt->disableSAX))
8422
247k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
250k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
250k
    if (RAW == '[')
8431
200k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
50.0k
    if (RAW != '>') {
8437
5.42k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
5.42k
    }
8439
50.0k
    NEXT;
8440
50.0k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
200k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
200k
    if (RAW == '[') {
8457
200k
        int baseInputNr = ctxt->inputNr;
8458
200k
        ctxt->instate = XML_PARSER_DTD;
8459
200k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
200k
  SKIP_BLANKS;
8466
40.4M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
40.4M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
40.3M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
40.3M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
40.3M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
40.2M
          xmlParseMarkupDecl(ctxt);
8478
40.2M
            } else if (RAW == '%') {
8479
43.1k
          xmlParsePEReference(ctxt);
8480
43.1k
            } else {
8481
38.0k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
38.0k
                        "xmlParseInternalSubset: error detected in"
8483
38.0k
                        " Markup declaration\n");
8484
38.0k
                xmlHaltParser(ctxt);
8485
38.0k
                return;
8486
38.0k
            }
8487
40.2M
      SKIP_BLANKS;
8488
40.2M
  }
8489
162k
  if (RAW == ']') {
8490
150k
      NEXT;
8491
150k
      SKIP_BLANKS;
8492
150k
  }
8493
162k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
162k
    if (RAW != '>') {
8499
12.9k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
12.9k
  return;
8501
12.9k
    }
8502
149k
    NEXT;
8503
149k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
3.00M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
3.00M
    const xmlChar *name;
8544
3.00M
    xmlChar *val;
8545
8546
3.00M
    *value = NULL;
8547
3.00M
    GROW;
8548
3.00M
    name = xmlParseName(ctxt);
8549
3.00M
    if (name == NULL) {
8550
164k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
164k
                 "error parsing attribute name\n");
8552
164k
        return(NULL);
8553
164k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
2.83M
    SKIP_BLANKS;
8559
2.83M
    if (RAW == '=') {
8560
2.73M
        NEXT;
8561
2.73M
  SKIP_BLANKS;
8562
2.73M
  val = xmlParseAttValue(ctxt);
8563
2.73M
  ctxt->instate = XML_PARSER_CONTENT;
8564
2.73M
    } else {
8565
106k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
106k
         "Specification mandates value for attribute %s\n", name);
8567
106k
  return(name);
8568
106k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
2.73M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
8.97k
  if (!xmlCheckLanguageID(val)) {
8577
5.79k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
5.79k
              "Malformed value for xml:lang : %s\n",
8579
5.79k
        val, NULL);
8580
5.79k
  }
8581
8.97k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
2.73M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
1.86k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
129
      *(ctxt->space) = 0;
8589
1.74k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
436
      *(ctxt->space) = 1;
8591
1.30k
  else {
8592
1.30k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
1.30k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
1.30k
                                 val, NULL);
8595
1.30k
  }
8596
1.86k
    }
8597
8598
2.73M
    *value = val;
8599
2.73M
    return(name);
8600
2.83M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
3.20M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
3.20M
    const xmlChar *name;
8634
3.20M
    const xmlChar *attname;
8635
3.20M
    xmlChar *attvalue;
8636
3.20M
    const xmlChar **atts = ctxt->atts;
8637
3.20M
    int nbatts = 0;
8638
3.20M
    int maxatts = ctxt->maxatts;
8639
3.20M
    int i;
8640
8641
3.20M
    if (RAW != '<') return(NULL);
8642
3.20M
    NEXT1;
8643
8644
3.20M
    name = xmlParseName(ctxt);
8645
3.20M
    if (name == NULL) {
8646
114k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
114k
       "xmlParseStartTag: invalid element name\n");
8648
114k
        return(NULL);
8649
114k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
3.08M
    SKIP_BLANKS;
8657
3.08M
    GROW;
8658
8659
4.26M
    while (((RAW != '>') &&
8660
4.26M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
4.26M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
3.00M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
3.00M
        if (attname == NULL) {
8664
164k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
164k
         "xmlParseStartTag: problem parsing attributes\n");
8666
164k
      break;
8667
164k
  }
8668
2.83M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
3.77M
      for (i = 0; i < nbatts;i += 2) {
8675
1.06M
          if (xmlStrEqual(atts[i], attname)) {
8676
7.91k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
7.91k
        xmlFree(attvalue);
8678
7.91k
        goto failed;
8679
7.91k
    }
8680
1.06M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
2.71M
      if (atts == NULL) {
8685
69.4k
          maxatts = 22; /* allow for 10 attrs by default */
8686
69.4k
          atts = (const xmlChar **)
8687
69.4k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
69.4k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
69.4k
    ctxt->atts = atts;
8695
69.4k
    ctxt->maxatts = maxatts;
8696
2.64M
      } else if (nbatts + 4 > maxatts) {
8697
153
          const xmlChar **n;
8698
8699
153
          maxatts *= 2;
8700
153
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
153
               maxatts * sizeof(const xmlChar *));
8702
153
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
153
    atts = n;
8709
153
    ctxt->atts = atts;
8710
153
    ctxt->maxatts = maxatts;
8711
153
      }
8712
2.71M
      atts[nbatts++] = attname;
8713
2.71M
      atts[nbatts++] = attvalue;
8714
2.71M
      atts[nbatts] = NULL;
8715
2.71M
      atts[nbatts + 1] = NULL;
8716
2.71M
  } else {
8717
118k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
118k
  }
8720
8721
2.83M
failed:
8722
8723
2.83M
  GROW
8724
2.83M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
1.66M
      break;
8726
1.17M
  if (SKIP_BLANKS == 0) {
8727
173k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
173k
         "attributes construct error\n");
8729
173k
  }
8730
1.17M
  SHRINK;
8731
1.17M
        GROW;
8732
1.17M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
3.08M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
3.08M
  (!ctxt->disableSAX)) {
8739
2.78M
  if (nbatts > 0)
8740
1.55M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
1.23M
  else
8742
1.23M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
2.78M
    }
8744
8745
3.08M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
5.53M
        for (i = 1;i < nbatts;i+=2)
8748
2.71M
      if (atts[i] != NULL)
8749
2.71M
         xmlFree((xmlChar *) atts[i]);
8750
2.82M
    }
8751
3.08M
    return(name);
8752
3.08M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.67M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.67M
    const xmlChar *name;
8772
8773
1.67M
    GROW;
8774
1.67M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.67M
    SKIP(2);
8780
8781
1.67M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.67M
    GROW;
8787
1.67M
    SKIP_BLANKS;
8788
1.67M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
26.8k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
26.8k
    } else
8791
1.64M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.67M
    if (name != (xmlChar*)1) {
8800
76.1k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
76.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
76.1k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
76.1k
                    ctxt->name, line, name);
8804
76.1k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.67M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.67M
  (!ctxt->disableSAX))
8811
1.52M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.67M
    namePop(ctxt);
8814
1.67M
    spacePop(ctxt);
8815
1.67M
    return;
8816
1.67M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
5.98M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
5.98M
    int i;
8858
8859
5.98M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
6.39M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
959k
        if (ctxt->nsTab[i] == prefix) {
8862
464k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
2.70k
          return(NULL);
8864
461k
      return(ctxt->nsTab[i + 1]);
8865
464k
  }
8866
5.43M
    return(NULL);
8867
5.89M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
12.2M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
12.2M
    const xmlChar *l, *p;
8886
8887
12.2M
    GROW;
8888
8889
12.2M
    l = xmlParseNCName(ctxt);
8890
12.2M
    if (l == NULL) {
8891
258k
        if (CUR == ':') {
8892
4.83k
      l = xmlParseName(ctxt);
8893
4.83k
      if (l != NULL) {
8894
4.83k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
4.83k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
4.83k
    *prefix = NULL;
8897
4.83k
    return(l);
8898
4.83k
      }
8899
4.83k
  }
8900
253k
        return(NULL);
8901
258k
    }
8902
12.0M
    if (CUR == ':') {
8903
603k
        NEXT;
8904
603k
  p = l;
8905
603k
  l = xmlParseNCName(ctxt);
8906
603k
  if (l == NULL) {
8907
10.8k
      xmlChar *tmp;
8908
8909
10.8k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
10.8k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
10.8k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
10.8k
      l = xmlParseNmtoken(ctxt);
8914
10.8k
      if (l == NULL) {
8915
6.45k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
6.45k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
6.45k
            } else {
8919
4.40k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
4.40k
    xmlFree((char *)l);
8921
4.40k
      }
8922
10.8k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
10.8k
      if (tmp != NULL) xmlFree(tmp);
8924
10.8k
      *prefix = NULL;
8925
10.8k
      return(p);
8926
10.8k
  }
8927
592k
  if (CUR == ':') {
8928
9.49k
      xmlChar *tmp;
8929
8930
9.49k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
9.49k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
9.49k
      NEXT;
8933
9.49k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
9.49k
      if (tmp != NULL) {
8935
7.89k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
7.89k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
7.89k
    if (tmp != NULL) xmlFree(tmp);
8938
7.89k
    *prefix = p;
8939
7.89k
    return(l);
8940
7.89k
      }
8941
1.60k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
1.60k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
1.60k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
1.60k
      if (tmp != NULL) xmlFree(tmp);
8946
1.60k
      *prefix = p;
8947
1.60k
      return(l);
8948
1.60k
  }
8949
582k
  *prefix = p;
8950
582k
    } else
8951
11.4M
        *prefix = NULL;
8952
12.0M
    return(l);
8953
12.0M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
139k
                        xmlChar const *prefix) {
8971
139k
    const xmlChar *cmp;
8972
139k
    const xmlChar *in;
8973
139k
    const xmlChar *ret;
8974
139k
    const xmlChar *prefix2;
8975
8976
139k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
139k
    GROW;
8979
139k
    in = ctxt->input->cur;
8980
8981
139k
    cmp = prefix;
8982
483k
    while (*in != 0 && *in == *cmp) {
8983
344k
  ++in;
8984
344k
  ++cmp;
8985
344k
    }
8986
139k
    if ((*cmp == 0) && (*in == ':')) {
8987
126k
        in++;
8988
126k
  cmp = name;
8989
1.01M
  while (*in != 0 && *in == *cmp) {
8990
890k
      ++in;
8991
890k
      ++cmp;
8992
890k
  }
8993
126k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
114k
            ctxt->input->col += in - ctxt->input->cur;
8996
114k
      ctxt->input->cur = in;
8997
114k
      return((const xmlChar*) 1);
8998
114k
  }
8999
126k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
25.0k
    ret = xmlParseQName (ctxt, &prefix2);
9004
25.0k
    if ((ret == name) && (prefix == prefix2))
9005
304
  return((const xmlChar*) 1);
9006
24.6k
    return ret;
9007
25.0k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
4.40k
    const xmlChar *oldbase = ctxt->input->base;\
9045
4.40k
    GROW;\
9046
4.40k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
4.40k
        return(NULL);\
9048
4.40k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
4.40k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
9.25M
{
9059
9.25M
    xmlChar limit = 0;
9060
9.25M
    const xmlChar *in = NULL, *start, *end, *last;
9061
9.25M
    xmlChar *ret = NULL;
9062
9.25M
    int line, col;
9063
9.25M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
2.81M
                    XML_MAX_HUGE_LENGTH :
9065
9.25M
                    XML_MAX_TEXT_LENGTH;
9066
9067
9.25M
    GROW;
9068
9.25M
    in = (xmlChar *) CUR_PTR;
9069
9.25M
    line = ctxt->input->line;
9070
9.25M
    col = ctxt->input->col;
9071
9.25M
    if (*in != '"' && *in != '\'') {
9072
29.5k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
29.5k
        return (NULL);
9074
29.5k
    }
9075
9.22M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
9.22M
    limit = *in++;
9083
9.22M
    col++;
9084
9.22M
    end = ctxt->input->end;
9085
9.22M
    start = in;
9086
9.22M
    if (in >= end) {
9087
601
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
601
    }
9089
9.22M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
501k
  while ((in < end) && (*in != limit) &&
9094
501k
         ((*in == 0x20) || (*in == 0x9) ||
9095
499k
          (*in == 0xA) || (*in == 0xD))) {
9096
225k
      if (*in == 0xA) {
9097
21.6k
          line++; col = 1;
9098
203k
      } else {
9099
203k
          col++;
9100
203k
      }
9101
225k
      in++;
9102
225k
      start = in;
9103
225k
      if (in >= end) {
9104
105
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
105
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
105
      }
9111
225k
  }
9112
2.69M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
2.69M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
2.42M
      col++;
9115
2.42M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
2.41M
      if (in >= end) {
9117
221
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
221
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
221
      }
9124
2.41M
  }
9125
276k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
280k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
511k
  while ((in < end) && (*in != limit) &&
9131
511k
         ((*in == 0x20) || (*in == 0x9) ||
9132
257k
          (*in == 0xA) || (*in == 0xD))) {
9133
235k
      if (*in == 0xA) {
9134
11.6k
          line++, col = 1;
9135
223k
      } else {
9136
223k
          col++;
9137
223k
      }
9138
235k
      in++;
9139
235k
      if (in >= end) {
9140
174
    const xmlChar *oldbase = ctxt->input->base;
9141
174
    GROW;
9142
174
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
174
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
174
    end = ctxt->input->end;
9151
174
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
174
      }
9157
235k
  }
9158
276k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
276k
  if (*in != limit) goto need_complex;
9164
8.94M
    } else {
9165
106M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
106M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
97.1M
      in++;
9168
97.1M
      col++;
9169
97.1M
      if (in >= end) {
9170
3.47k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
3.47k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
3.47k
      }
9177
97.1M
  }
9178
8.94M
  last = in;
9179
8.94M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
8.94M
  if (*in != limit) goto need_complex;
9185
8.94M
    }
9186
8.84M
    in++;
9187
8.84M
    col++;
9188
8.84M
    if (len != NULL) {
9189
6.06M
        if (alloc) *alloc = 0;
9190
6.06M
        *len = last - start;
9191
6.06M
        ret = (xmlChar *) start;
9192
6.06M
    } else {
9193
2.78M
        if (alloc) *alloc = 1;
9194
2.78M
        ret = xmlStrndup(start, last - start);
9195
2.78M
    }
9196
8.84M
    CUR_PTR = in;
9197
8.84M
    ctxt->input->line = line;
9198
8.84M
    ctxt->input->col = col;
9199
8.84M
    return ret;
9200
372k
need_complex:
9201
372k
    if (alloc) *alloc = 1;
9202
372k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
9.22M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
6.42M
{
9226
6.42M
    const xmlChar *name;
9227
6.42M
    xmlChar *val, *internal_val = NULL;
9228
6.42M
    int normalize = 0;
9229
9230
6.42M
    *value = NULL;
9231
6.42M
    GROW;
9232
6.42M
    name = xmlParseQName(ctxt, prefix);
9233
6.42M
    if (name == NULL) {
9234
91.3k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
91.3k
                       "error parsing attribute name\n");
9236
91.3k
        return (NULL);
9237
91.3k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
6.33M
    if (ctxt->attsSpecial != NULL) {
9243
774k
        int type;
9244
9245
774k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
774k
                                                 pref, elem, *prefix, name);
9247
774k
        if (type != 0)
9248
276k
            normalize = 1;
9249
774k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
6.33M
    SKIP_BLANKS;
9255
6.33M
    if (RAW == '=') {
9256
6.28M
        NEXT;
9257
6.28M
        SKIP_BLANKS;
9258
6.28M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
6.28M
        if (val == NULL)
9260
13.4k
            return (NULL);
9261
6.26M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
276k
      if (*alloc) {
9269
22.8k
          const xmlChar *val2;
9270
9271
22.8k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
22.8k
    if ((val2 != NULL) && (val2 != val)) {
9273
4.58k
        xmlFree(val);
9274
4.58k
        val = (xmlChar *) val2;
9275
4.58k
    }
9276
22.8k
      }
9277
276k
  }
9278
6.26M
        ctxt->instate = XML_PARSER_CONTENT;
9279
6.26M
    } else {
9280
52.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
52.7k
                          "Specification mandates value for attribute %s\n",
9282
52.7k
                          name);
9283
52.7k
        return (name);
9284
52.7k
    }
9285
9286
6.26M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
43.7k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
9.55k
            internal_val = xmlStrndup(val, *len);
9294
9.55k
            if (!xmlCheckLanguageID(internal_val)) {
9295
5.39k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
5.39k
                              "Malformed value for xml:lang : %s\n",
9297
5.39k
                              internal_val, NULL);
9298
5.39k
            }
9299
9.55k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
43.7k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
3.53k
            internal_val = xmlStrndup(val, *len);
9306
3.53k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
133
                *(ctxt->space) = 0;
9308
3.39k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
320
                *(ctxt->space) = 1;
9310
3.07k
            else {
9311
3.07k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
3.07k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
3.07k
                              internal_val, NULL);
9314
3.07k
            }
9315
3.53k
        }
9316
43.7k
        if (internal_val) {
9317
13.0k
            xmlFree(internal_val);
9318
13.0k
        }
9319
43.7k
    }
9320
9321
6.26M
    *value = val;
9322
6.26M
    return (name);
9323
6.33M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
5.83M
                  const xmlChar **URI, int *tlen) {
9356
5.83M
    const xmlChar *localname;
9357
5.83M
    const xmlChar *prefix;
9358
5.83M
    const xmlChar *attname;
9359
5.83M
    const xmlChar *aprefix;
9360
5.83M
    const xmlChar *nsname;
9361
5.83M
    xmlChar *attvalue;
9362
5.83M
    const xmlChar **atts = ctxt->atts;
9363
5.83M
    int maxatts = ctxt->maxatts;
9364
5.83M
    int nratts, nbatts, nbdef, inputid;
9365
5.83M
    int i, j, nbNs, attval;
9366
5.83M
    unsigned long cur;
9367
5.83M
    int nsNr = ctxt->nsNr;
9368
9369
5.83M
    if (RAW != '<') return(NULL);
9370
5.83M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
5.83M
    SHRINK;
9380
5.83M
    cur = ctxt->input->cur - ctxt->input->base;
9381
5.83M
    inputid = ctxt->input->id;
9382
5.83M
    nbatts = 0;
9383
5.83M
    nratts = 0;
9384
5.83M
    nbdef = 0;
9385
5.83M
    nbNs = 0;
9386
5.83M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
5.83M
    ctxt->nsNr = nsNr;
9389
9390
5.83M
    localname = xmlParseQName(ctxt, &prefix);
9391
5.83M
    if (localname == NULL) {
9392
160k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
160k
           "StartTag: invalid element name\n");
9394
160k
        return(NULL);
9395
160k
    }
9396
5.67M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
5.67M
    SKIP_BLANKS;
9404
5.67M
    GROW;
9405
9406
8.24M
    while (((RAW != '>') &&
9407
8.24M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
8.24M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
6.42M
  int len = -1, alloc = 0;
9410
9411
6.42M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
6.42M
                               &aprefix, &attvalue, &len, &alloc);
9413
6.42M
        if (attname == NULL) {
9414
104k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
104k
           "xmlParseStartTag: problem parsing attributes\n");
9416
104k
      break;
9417
104k
  }
9418
6.32M
        if (attvalue == NULL)
9419
52.7k
            goto next_attr;
9420
6.26M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
6.26M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
40.3k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
40.3k
            xmlURIPtr uri;
9425
9426
40.3k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
40.3k
            if (*URL != 0) {
9434
39.4k
                uri = xmlParseURI((const char *) URL);
9435
39.4k
                if (uri == NULL) {
9436
13.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
13.3k
                             "xmlns: '%s' is not a valid URI\n",
9438
13.3k
                                       URL, NULL, NULL);
9439
26.1k
                } else {
9440
26.1k
                    if (uri->scheme == NULL) {
9441
5.08k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
5.08k
                                  "xmlns: URI %s is not absolute\n",
9443
5.08k
                                  URL, NULL, NULL);
9444
5.08k
                    }
9445
26.1k
                    xmlFreeURI(uri);
9446
26.1k
                }
9447
39.4k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
39.4k
                if ((len == 29) &&
9456
39.4k
                    (xmlStrEqual(URL,
9457
1.28k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
433
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
433
                         "reuse of the xmlns namespace name is forbidden\n",
9460
433
                             NULL, NULL, NULL);
9461
433
                    goto next_attr;
9462
433
                }
9463
39.4k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
58.8k
            for (j = 1;j <= nbNs;j++)
9468
23.9k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
4.97k
                    break;
9470
39.9k
            if (j <= nbNs)
9471
4.97k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
34.9k
            else
9473
34.9k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
6.22M
        } else if (aprefix == ctxt->str_xmlns) {
9476
85.7k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
85.7k
            xmlURIPtr uri;
9478
9479
85.7k
            if (attname == ctxt->str_xml) {
9480
1.20k
                if (URL != ctxt->str_xml_ns) {
9481
1.20k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
1.20k
                             "xml namespace prefix mapped to wrong URI\n",
9483
1.20k
                             NULL, NULL, NULL);
9484
1.20k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
1.20k
                goto next_attr;
9489
1.20k
            }
9490
84.5k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
84.5k
            if (attname == ctxt->str_xmlns) {
9499
478
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
478
                         "redefinition of the xmlns prefix is forbidden\n",
9501
478
                         NULL, NULL, NULL);
9502
478
                goto next_attr;
9503
478
            }
9504
84.0k
            if ((len == 29) &&
9505
84.0k
                (xmlStrEqual(URL,
9506
2.11k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
348
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
348
                         "reuse of the xmlns namespace name is forbidden\n",
9509
348
                         NULL, NULL, NULL);
9510
348
                goto next_attr;
9511
348
            }
9512
83.6k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
1.13k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
1.13k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
1.13k
                              attname, NULL, NULL);
9516
1.13k
                goto next_attr;
9517
82.5k
            } else {
9518
82.5k
                uri = xmlParseURI((const char *) URL);
9519
82.5k
                if (uri == NULL) {
9520
19.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
19.1k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
19.1k
                                       attname, URL, NULL);
9523
63.4k
                } else {
9524
63.4k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
1.44k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
1.44k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
1.44k
                                  attname, URL, NULL);
9528
1.44k
                    }
9529
63.4k
                    xmlFreeURI(uri);
9530
63.4k
                }
9531
82.5k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
142k
            for (j = 1;j <= nbNs;j++)
9537
66.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
6.60k
                    break;
9539
82.5k
            if (j <= nbNs)
9540
6.60k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
75.9k
            else
9542
75.9k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
6.14M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
6.14M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
91.2k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
91.2k
                maxatts = ctxt->maxatts;
9553
91.2k
                atts = ctxt->atts;
9554
91.2k
            }
9555
6.14M
            ctxt->attallocs[nratts++] = alloc;
9556
6.14M
            atts[nbatts++] = attname;
9557
6.14M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
6.14M
            if (alloc)
9565
172k
                atts[nbatts++] = NULL;
9566
5.96M
            else
9567
5.96M
                atts[nbatts++] = ctxt->input->base;
9568
6.14M
            atts[nbatts++] = attvalue;
9569
6.14M
            attvalue += len;
9570
6.14M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
6.14M
            if (alloc != 0) attval = 1;
9575
6.14M
            attvalue = NULL; /* moved into atts */
9576
6.14M
        }
9577
9578
6.32M
next_attr:
9579
6.32M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
31.9k
            xmlFree(attvalue);
9581
31.9k
            attvalue = NULL;
9582
31.9k
        }
9583
9584
6.32M
  GROW
9585
6.32M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
6.32M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
3.61M
      break;
9589
2.71M
  if (SKIP_BLANKS == 0) {
9590
142k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
142k
         "attributes construct error\n");
9592
142k
      break;
9593
142k
  }
9594
2.56M
        GROW;
9595
2.56M
    }
9596
9597
5.67M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
11.8M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
6.14M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
5.96M
            const xmlChar *old = atts[i+2];
9612
5.96M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
5.96M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
5.96M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
5.96M
        }
9616
6.14M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
5.67M
    if (ctxt->attsDefault != NULL) {
9622
941k
        xmlDefAttrsPtr defaults;
9623
9624
941k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
941k
  if (defaults != NULL) {
9626
267k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
183k
          attname = defaults->values[5 * i];
9628
183k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
183k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
11.0k
        for (j = 1;j <= nbNs;j++)
9638
4.45k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
2.32k
          break;
9640
8.89k
              if (j <= nbNs) continue;
9641
9642
6.56k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
6.56k
        if (nsname != defaults->values[5 * i + 2]) {
9644
3.43k
      if (nsPush(ctxt, NULL,
9645
3.43k
                 defaults->values[5 * i + 2]) > 0)
9646
3.32k
          nbNs++;
9647
3.43k
        }
9648
174k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
19.6k
        for (j = 1;j <= nbNs;j++)
9653
5.00k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
2.18k
          break;
9655
16.8k
              if (j <= nbNs) continue;
9656
9657
14.6k
        nsname = xmlGetNamespace(ctxt, attname);
9658
14.6k
        if (nsname != defaults->values[5 * i + 2]) {
9659
7.99k
      if (nsPush(ctxt, attname,
9660
7.99k
                 defaults->values[5 * i + 2]) > 0)
9661
7.34k
          nbNs++;
9662
7.99k
        }
9663
158k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
424k
        for (j = 0;j < nbatts;j+=5) {
9668
268k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
1.93k
          break;
9670
268k
        }
9671
158k
        if (j < nbatts) continue;
9672
9673
156k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
4.83k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
4.83k
      maxatts = ctxt->maxatts;
9679
4.83k
      atts = ctxt->atts;
9680
4.83k
        }
9681
156k
        atts[nbatts++] = attname;
9682
156k
        atts[nbatts++] = aprefix;
9683
156k
        if (aprefix == NULL)
9684
110k
      atts[nbatts++] = NULL;
9685
45.5k
        else
9686
45.5k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
156k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
156k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
156k
        if ((ctxt->standalone == 1) &&
9690
156k
            (defaults->values[5 * i + 4] != NULL)) {
9691
3
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
3
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
3
                                   attname, localname);
9694
3
        }
9695
156k
        nbdef++;
9696
156k
    }
9697
183k
      }
9698
84.0k
  }
9699
941k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
11.9M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
6.29M
  if (atts[i + 1] != NULL) {
9709
236k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
236k
      if (nsname == NULL) {
9711
106k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
106k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
106k
        atts[i + 1], atts[i], localname);
9714
106k
      }
9715
236k
      atts[i + 2] = nsname;
9716
236k
  } else
9717
6.06M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
9.11M
        for (j = 0; j < i;j += 5) {
9725
2.83M
      if (atts[i] == atts[j]) {
9726
26.0k
          if (atts[i+1] == atts[j+1]) {
9727
10.4k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
10.4k
        break;
9729
10.4k
    }
9730
15.5k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
726
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
726
           "Namespaced Attribute %s in '%s' redefined\n",
9733
726
           atts[i], nsname, NULL);
9734
726
        break;
9735
726
    }
9736
15.5k
      }
9737
2.83M
  }
9738
6.29M
    }
9739
9740
5.67M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
5.67M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
144k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
144k
           "Namespace prefix %s on %s is not defined\n",
9744
144k
     prefix, localname, NULL);
9745
144k
    }
9746
5.67M
    *pref = prefix;
9747
5.67M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
5.67M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
5.67M
  (!ctxt->disableSAX)) {
9754
5.00M
  if (nbNs > 0)
9755
59.9k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
59.9k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
59.9k
        nbatts / 5, nbdef, atts);
9758
4.94M
  else
9759
4.94M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
4.94M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
5.00M
    }
9762
9763
5.67M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
5.67M
    if (attval != 0) {
9768
378k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
218k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
172k
          xmlFree((xmlChar *) atts[i]);
9771
159k
    }
9772
9773
5.67M
    return(localname);
9774
5.67M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
2.61M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
2.61M
    const xmlChar *name;
9794
9795
2.61M
    GROW;
9796
2.61M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
2.61M
    SKIP(2);
9801
9802
2.61M
    if (tag->prefix == NULL)
9803
2.47M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
139k
    else
9805
139k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
2.61M
    GROW;
9811
2.61M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
2.61M
    SKIP_BLANKS;
9814
2.61M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
33.5k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
33.5k
    } else
9817
2.57M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
2.61M
    if (name != (xmlChar*)1) {
9826
101k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
101k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
101k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
101k
                    ctxt->name, tag->line, name);
9830
101k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
2.61M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
2.61M
  (!ctxt->disableSAX))
9837
2.27M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
2.27M
                                tag->URI);
9839
9840
2.61M
    spacePop(ctxt);
9841
2.61M
    if (tag->nsNr != 0)
9842
12.8k
  nsPop(ctxt, tag->nsNr);
9843
2.61M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
23.7k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
23.7k
    xmlChar *buf = NULL;
9864
23.7k
    int len = 0;
9865
23.7k
    int size = XML_PARSER_BUFFER_SIZE;
9866
23.7k
    int r, rl;
9867
23.7k
    int s, sl;
9868
23.7k
    int cur, l;
9869
23.7k
    int count = 0;
9870
23.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
8.12k
                    XML_MAX_HUGE_LENGTH :
9872
23.7k
                    XML_MAX_TEXT_LENGTH;
9873
9874
23.7k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
23.7k
    SKIP(3);
9877
9878
23.7k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
23.7k
    SKIP(6);
9881
9882
23.7k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
23.7k
    r = CUR_CHAR(rl);
9884
23.7k
    if (!IS_CHAR(r)) {
9885
1.35k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
1.35k
        goto out;
9887
1.35k
    }
9888
22.4k
    NEXTL(rl);
9889
22.4k
    s = CUR_CHAR(sl);
9890
22.4k
    if (!IS_CHAR(s)) {
9891
1.56k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
1.56k
        goto out;
9893
1.56k
    }
9894
20.8k
    NEXTL(sl);
9895
20.8k
    cur = CUR_CHAR(l);
9896
20.8k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
20.8k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
12.2M
    while (IS_CHAR(cur) &&
9902
12.2M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
12.2M
  if (len + 5 >= size) {
9904
20.0k
      xmlChar *tmp;
9905
9906
20.0k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
20.0k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
20.0k
      buf = tmp;
9912
20.0k
      size *= 2;
9913
20.0k
  }
9914
12.2M
  COPY_BUF(rl,buf,len,r);
9915
12.2M
  r = s;
9916
12.2M
  rl = sl;
9917
12.2M
  s = cur;
9918
12.2M
  sl = l;
9919
12.2M
  count++;
9920
12.2M
  if (count > 50) {
9921
233k
      SHRINK;
9922
233k
      GROW;
9923
233k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
233k
      count = 0;
9927
233k
  }
9928
12.2M
  NEXTL(l);
9929
12.2M
  cur = CUR_CHAR(l);
9930
12.2M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
12.2M
    }
9936
20.8k
    buf[len] = 0;
9937
20.8k
    if (cur != '>') {
9938
3.75k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
3.75k
                       "CData section not finished\n%.50s\n", buf);
9940
3.75k
        goto out;
9941
3.75k
    }
9942
17.0k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
17.0k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
12.5k
  if (ctxt->sax->cdataBlock != NULL)
9949
8.09k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
4.44k
  else if (ctxt->sax->characters != NULL)
9951
4.44k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
12.5k
    }
9953
9954
23.7k
out:
9955
23.7k
    if (ctxt->instate != XML_PARSER_EOF)
9956
23.7k
        ctxt->instate = XML_PARSER_CONTENT;
9957
23.7k
    xmlFree(buf);
9958
23.7k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
217k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
217k
    int nameNr = ctxt->nameNr;
9971
9972
217k
    GROW;
9973
14.5M
    while ((RAW != 0) &&
9974
14.5M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
14.4M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
14.4M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
33.3k
      xmlParsePI(ctxt);
9982
33.3k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
14.3M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
23.7k
      xmlParseCDSect(ctxt);
9990
23.7k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
14.3M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
14.3M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
111k
      xmlParseComment(ctxt);
9998
111k
      ctxt->instate = XML_PARSER_CONTENT;
9999
111k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
14.2M
  else if (*cur == '<') {
10005
5.87M
            if (NXT(1) == '/') {
10006
1.93M
                if (ctxt->nameNr <= nameNr)
10007
31.8k
                    break;
10008
1.90M
          xmlParseElementEnd(ctxt);
10009
3.93M
            } else {
10010
3.93M
          xmlParseElementStart(ctxt);
10011
3.93M
            }
10012
5.87M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
8.36M
  else if (*cur == '&') {
10020
2.01M
      xmlParseReference(ctxt);
10021
2.01M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
6.34M
  else {
10027
6.34M
      xmlParseCharData(ctxt, 0);
10028
6.34M
  }
10029
10030
14.3M
  GROW;
10031
14.3M
  SHRINK;
10032
14.3M
    }
10033
217k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
145k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
145k
    int nameNr = ctxt->nameNr;
10047
10048
145k
    xmlParseContentInternal(ctxt);
10049
10050
145k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
7.72k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
7.72k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
7.72k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
7.72k
                "Premature end of data in tag %s line %d\n",
10055
7.72k
    name, line, NULL);
10056
7.72k
    }
10057
145k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
99.1k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
99.1k
    if (xmlParseElementStart(ctxt) != 0)
10078
27.9k
        return;
10079
10080
71.2k
    xmlParseContentInternal(ctxt);
10081
71.2k
    if (ctxt->instate == XML_PARSER_EOF)
10082
247
  return;
10083
10084
70.9k
    if (CUR == 0) {
10085
41.2k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
41.2k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
41.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
41.2k
                "Premature end of data in tag %s line %d\n",
10089
41.2k
    name, line, NULL);
10090
41.2k
        return;
10091
41.2k
    }
10092
10093
29.6k
    xmlParseElementEnd(ctxt);
10094
29.6k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
4.03M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
4.03M
    const xmlChar *name;
10108
4.03M
    const xmlChar *prefix = NULL;
10109
4.03M
    const xmlChar *URI = NULL;
10110
4.03M
    xmlParserNodeInfo node_info;
10111
4.03M
    int line, tlen = 0;
10112
4.03M
    xmlNodePtr ret;
10113
4.03M
    int nsNr = ctxt->nsNr;
10114
10115
4.03M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
4.03M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
4.03M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
4.03M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
4.03M
    else if (*ctxt->space == -2)
10134
724k
  spacePush(ctxt, -1);
10135
3.31M
    else
10136
3.31M
  spacePush(ctxt, *ctxt->space);
10137
10138
4.03M
    line = ctxt->input->line;
10139
4.03M
#ifdef LIBXML_SAX1_ENABLED
10140
4.03M
    if (ctxt->sax2)
10141
2.59M
#endif /* LIBXML_SAX1_ENABLED */
10142
2.59M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
1.43M
#ifdef LIBXML_SAX1_ENABLED
10144
1.43M
    else
10145
1.43M
  name = xmlParseStartTag(ctxt);
10146
4.03M
#endif /* LIBXML_SAX1_ENABLED */
10147
4.03M
    if (ctxt->instate == XML_PARSER_EOF)
10148
193
  return(-1);
10149
4.03M
    if (name == NULL) {
10150
258k
  spacePop(ctxt);
10151
258k
        return(-1);
10152
258k
    }
10153
3.77M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
3.77M
    ret = ctxt->node;
10155
10156
3.77M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
3.77M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
3.77M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
3.77M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
3.77M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
1.48M
        SKIP(2);
10172
1.48M
  if (ctxt->sax2) {
10173
1.05M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
1.05M
    (!ctxt->disableSAX))
10175
805k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
1.05M
#ifdef LIBXML_SAX1_ENABLED
10177
1.05M
  } else {
10178
433k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
433k
    (!ctxt->disableSAX))
10180
320k
    ctxt->sax->endElement(ctxt->userData, name);
10181
433k
#endif /* LIBXML_SAX1_ENABLED */
10182
433k
  }
10183
1.48M
  namePop(ctxt);
10184
1.48M
  spacePop(ctxt);
10185
1.48M
  if (nsNr != ctxt->nsNr)
10186
6.07k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
1.48M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
1.48M
  return(1);
10195
1.48M
    }
10196
2.29M
    if (RAW == '>') {
10197
2.09M
        NEXT1;
10198
2.09M
    } else {
10199
193k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
193k
         "Couldn't find end of Start Tag %s line %d\n",
10201
193k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
193k
  nodePop(ctxt);
10207
193k
  namePop(ctxt);
10208
193k
  spacePop(ctxt);
10209
193k
  if (nsNr != ctxt->nsNr)
10210
12.0k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
193k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
193k
  return(-1);
10223
193k
    }
10224
10225
2.09M
    return(0);
10226
2.29M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.93M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.93M
    xmlParserNodeInfo node_info;
10237
1.93M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.93M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.93M
    if (ctxt->sax2) {
10249
1.18M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
1.18M
  namePop(ctxt);
10251
1.18M
    }
10252
750k
#ifdef LIBXML_SAX1_ENABLED
10253
750k
    else
10254
750k
  xmlParseEndTag1(ctxt, 0);
10255
1.93M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.93M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.93M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
218k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
218k
    xmlChar *buf = NULL;
10286
218k
    int len = 0;
10287
218k
    int size = 10;
10288
218k
    xmlChar cur;
10289
10290
218k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
218k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
218k
    cur = CUR;
10296
218k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
2.41k
  xmlFree(buf);
10298
2.41k
  return(NULL);
10299
2.41k
    }
10300
216k
    buf[len++] = cur;
10301
216k
    NEXT;
10302
216k
    cur=CUR;
10303
216k
    if (cur != '.') {
10304
3.08k
  xmlFree(buf);
10305
3.08k
  return(NULL);
10306
3.08k
    }
10307
213k
    buf[len++] = cur;
10308
213k
    NEXT;
10309
213k
    cur=CUR;
10310
1.02M
    while ((cur >= '0') && (cur <= '9')) {
10311
807k
  if (len + 1 >= size) {
10312
1.99k
      xmlChar *tmp;
10313
10314
1.99k
      size *= 2;
10315
1.99k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.99k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.99k
      buf = tmp;
10322
1.99k
  }
10323
807k
  buf[len++] = cur;
10324
807k
  NEXT;
10325
807k
  cur=CUR;
10326
807k
    }
10327
213k
    buf[len] = 0;
10328
213k
    return(buf);
10329
213k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
245k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
245k
    xmlChar *version = NULL;
10349
10350
245k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
224k
  SKIP(7);
10352
224k
  SKIP_BLANKS;
10353
224k
  if (RAW != '=') {
10354
2.80k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
2.80k
      return(NULL);
10356
2.80k
        }
10357
221k
  NEXT;
10358
221k
  SKIP_BLANKS;
10359
221k
  if (RAW == '"') {
10360
194k
      NEXT;
10361
194k
      version = xmlParseVersionNum(ctxt);
10362
194k
      if (RAW != '"') {
10363
8.30k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
8.30k
      } else
10365
186k
          NEXT;
10366
194k
  } else if (RAW == '\''){
10367
24.0k
      NEXT;
10368
24.0k
      version = xmlParseVersionNum(ctxt);
10369
24.0k
      if (RAW != '\'') {
10370
2.75k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
2.75k
      } else
10372
21.2k
          NEXT;
10373
24.0k
  } else {
10374
2.93k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
2.93k
  }
10376
221k
    }
10377
242k
    return(version);
10378
245k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
95.3k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
95.3k
    xmlChar *buf = NULL;
10395
95.3k
    int len = 0;
10396
95.3k
    int size = 10;
10397
95.3k
    xmlChar cur;
10398
10399
95.3k
    cur = CUR;
10400
95.3k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
95.3k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
94.1k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
94.1k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
94.1k
  buf[len++] = cur;
10409
94.1k
  NEXT;
10410
94.1k
  cur = CUR;
10411
1.59M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
1.59M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
1.59M
         ((cur >= '0') && (cur <= '9')) ||
10414
1.59M
         (cur == '.') || (cur == '_') ||
10415
1.59M
         (cur == '-')) {
10416
1.50M
      if (len + 1 >= size) {
10417
44.3k
          xmlChar *tmp;
10418
10419
44.3k
    size *= 2;
10420
44.3k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
44.3k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
44.3k
    buf = tmp;
10427
44.3k
      }
10428
1.50M
      buf[len++] = cur;
10429
1.50M
      NEXT;
10430
1.50M
      cur = CUR;
10431
1.50M
      if (cur == 0) {
10432
1.45k
          SHRINK;
10433
1.45k
    GROW;
10434
1.45k
    cur = CUR;
10435
1.45k
      }
10436
1.50M
        }
10437
94.1k
  buf[len] = 0;
10438
94.1k
    } else {
10439
1.22k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.22k
    }
10441
95.3k
    return(buf);
10442
95.3k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
167k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
167k
    xmlChar *encoding = NULL;
10462
10463
167k
    SKIP_BLANKS;
10464
167k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
98.8k
  SKIP(8);
10466
98.8k
  SKIP_BLANKS;
10467
98.8k
  if (RAW != '=') {
10468
1.70k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.70k
      return(NULL);
10470
1.70k
        }
10471
97.1k
  NEXT;
10472
97.1k
  SKIP_BLANKS;
10473
97.1k
  if (RAW == '"') {
10474
82.4k
      NEXT;
10475
82.4k
      encoding = xmlParseEncName(ctxt);
10476
82.4k
      if (RAW != '"') {
10477
4.60k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
4.60k
    xmlFree((xmlChar *) encoding);
10479
4.60k
    return(NULL);
10480
4.60k
      } else
10481
77.8k
          NEXT;
10482
82.4k
  } else if (RAW == '\''){
10483
12.8k
      NEXT;
10484
12.8k
      encoding = xmlParseEncName(ctxt);
10485
12.8k
      if (RAW != '\'') {
10486
2.11k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
2.11k
    xmlFree((xmlChar *) encoding);
10488
2.11k
    return(NULL);
10489
2.11k
      } else
10490
10.7k
          NEXT;
10491
12.8k
  } else {
10492
1.79k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
1.79k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
90.4k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
40.9k
      xmlFree((xmlChar *) encoding);
10500
40.9k
            return(NULL);
10501
40.9k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
49.4k
        if ((encoding != NULL) &&
10508
49.4k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
48.6k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
1.82k
      if ((ctxt->encoding == NULL) &&
10517
1.82k
          (ctxt->input->buf != NULL) &&
10518
1.82k
          (ctxt->input->buf->encoder == NULL)) {
10519
1.76k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
1.76k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
1.76k
      }
10522
1.82k
      if (ctxt->encoding != NULL)
10523
52
    xmlFree((xmlChar *) ctxt->encoding);
10524
1.82k
      ctxt->encoding = encoding;
10525
1.82k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
47.6k
        else if ((encoding != NULL) &&
10530
47.6k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
46.7k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
18.3k
      if (ctxt->encoding != NULL)
10533
461
    xmlFree((xmlChar *) ctxt->encoding);
10534
18.3k
      ctxt->encoding = encoding;
10535
18.3k
  }
10536
29.2k
  else if (encoding != NULL) {
10537
28.4k
      xmlCharEncodingHandlerPtr handler;
10538
10539
28.4k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
28.4k
      ctxt->input->encoding = encoding;
10542
10543
28.4k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
28.4k
      if (handler != NULL) {
10545
27.7k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
524
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
524
        return(NULL);
10549
524
    }
10550
27.7k
      } else {
10551
682
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
682
      "Unsupported encoding %s\n", encoding);
10553
682
    return(NULL);
10554
682
      }
10555
28.4k
  }
10556
49.4k
    }
10557
116k
    return(encoding);
10558
167k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
115k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
115k
    int standalone = -2;
10596
10597
115k
    SKIP_BLANKS;
10598
115k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
20.6k
  SKIP(10);
10600
20.6k
        SKIP_BLANKS;
10601
20.6k
  if (RAW != '=') {
10602
249
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
249
      return(standalone);
10604
249
        }
10605
20.3k
  NEXT;
10606
20.3k
  SKIP_BLANKS;
10607
20.3k
        if (RAW == '\''){
10608
8.67k
      NEXT;
10609
8.67k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
7.04k
          standalone = 0;
10611
7.04k
                SKIP(2);
10612
7.04k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
1.62k
                 (NXT(2) == 's')) {
10614
1.34k
          standalone = 1;
10615
1.34k
    SKIP(3);
10616
1.34k
            } else {
10617
285
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
285
      }
10619
8.67k
      if (RAW != '\'') {
10620
453
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
453
      } else
10622
8.22k
          NEXT;
10623
11.6k
  } else if (RAW == '"'){
10624
11.4k
      NEXT;
10625
11.4k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
3.59k
          standalone = 0;
10627
3.59k
    SKIP(2);
10628
7.82k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
7.82k
                 (NXT(2) == 's')) {
10630
6.60k
          standalone = 1;
10631
6.60k
                SKIP(3);
10632
6.60k
            } else {
10633
1.22k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
1.22k
      }
10635
11.4k
      if (RAW != '"') {
10636
1.40k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
1.40k
      } else
10638
10.0k
          NEXT;
10639
11.4k
  } else {
10640
270
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
270
        }
10642
20.3k
    }
10643
115k
    return(standalone);
10644
115k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
209k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
209k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
209k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
209k
    SKIP(5);
10672
10673
209k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
209k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
209k
    version = xmlParseVersionInfo(ctxt);
10683
209k
    if (version == NULL) {
10684
19.7k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
189k
    } else {
10686
189k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
3.06k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
1.08k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
1.08k
                "Unsupported version '%s'\n",
10693
1.08k
                version);
10694
1.98k
      } else {
10695
1.98k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
1.76k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
1.76k
                      "Unsupported version '%s'\n",
10698
1.76k
          version, NULL);
10699
1.76k
    } else {
10700
213
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
213
              "Unsupported version '%s'\n",
10702
213
              version);
10703
213
    }
10704
1.98k
      }
10705
3.06k
  }
10706
189k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
189k
  ctxt->version = version;
10709
189k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
209k
    if (!IS_BLANK_CH(RAW)) {
10715
101k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
77.5k
      SKIP(2);
10717
77.5k
      return;
10718
77.5k
  }
10719
23.8k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
23.8k
    }
10721
132k
    xmlParseEncodingDecl(ctxt);
10722
132k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
132k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
423
        return;
10728
423
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
131k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
17.4k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
15.9k
      SKIP(2);
10736
15.9k
      return;
10737
15.9k
  }
10738
1.46k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
1.46k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
115k
    GROW;
10745
10746
115k
    SKIP_BLANKS;
10747
115k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
115k
    SKIP_BLANKS;
10750
115k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
76.1k
        SKIP(2);
10752
76.1k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
585
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
585
  NEXT;
10756
38.8k
    } else {
10757
38.8k
        int c;
10758
10759
38.8k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
2.50M
        while ((c = CUR) != 0) {
10761
2.50M
            NEXT;
10762
2.50M
            if (c == '>')
10763
34.9k
                break;
10764
2.50M
        }
10765
38.8k
    }
10766
115k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
295k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
331k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
331k
        SKIP_BLANKS;
10783
331k
        GROW;
10784
331k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
23.4k
      xmlParsePI(ctxt);
10786
308k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
12.8k
      xmlParseComment(ctxt);
10788
295k
        } else {
10789
295k
            break;
10790
295k
        }
10791
331k
    }
10792
295k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
133k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
133k
    xmlChar start[4];
10812
133k
    xmlCharEncoding enc;
10813
10814
133k
    xmlInitParser();
10815
10816
133k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
133k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
133k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
133k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
133k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
133k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
133k
    if ((ctxt->encoding == NULL) &&
10835
133k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
133k
  start[0] = RAW;
10842
133k
  start[1] = NXT(1);
10843
133k
  start[2] = NXT(2);
10844
133k
  start[3] = NXT(3);
10845
133k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
133k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
74.2k
      xmlSwitchEncoding(ctxt, enc);
10848
74.2k
  }
10849
133k
    }
10850
10851
10852
133k
    if (CUR == 0) {
10853
297
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
297
  return(-1);
10855
297
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
133k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
5.66k
       GROW;
10865
5.66k
    }
10866
133k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
69.9k
  xmlParseXMLDecl(ctxt);
10872
69.9k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
69.9k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
141
      return(-1);
10878
141
  }
10879
69.7k
  ctxt->standalone = ctxt->input->standalone;
10880
69.7k
  SKIP_BLANKS;
10881
69.7k
    } else {
10882
63.4k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
63.4k
    }
10884
133k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
130k
        ctxt->sax->startDocument(ctxt->userData);
10886
133k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
133k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
133k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
133k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
133k
    GROW;
10903
133k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
84.5k
  ctxt->inSubset = 1;
10906
84.5k
  xmlParseDocTypeDecl(ctxt);
10907
84.5k
  if (RAW == '[') {
10908
67.9k
      ctxt->instate = XML_PARSER_DTD;
10909
67.9k
      xmlParseInternalSubset(ctxt);
10910
67.9k
      if (ctxt->instate == XML_PARSER_EOF)
10911
17.3k
    return(-1);
10912
67.9k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
67.1k
  ctxt->inSubset = 2;
10918
67.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
67.1k
      (!ctxt->disableSAX))
10920
63.6k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
63.6k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
67.1k
  if (ctxt->instate == XML_PARSER_EOF)
10923
4.24k
      return(-1);
10924
62.9k
  ctxt->inSubset = 0;
10925
10926
62.9k
        xmlCleanSpecialAttr(ctxt);
10927
10928
62.9k
  ctxt->instate = XML_PARSER_PROLOG;
10929
62.9k
  xmlParseMisc(ctxt);
10930
62.9k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
111k
    GROW;
10936
111k
    if (RAW != '<') {
10937
12.5k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
12.5k
           "Start tag expected, '<' not found\n");
10939
99.1k
    } else {
10940
99.1k
  ctxt->instate = XML_PARSER_CONTENT;
10941
99.1k
  xmlParseElement(ctxt);
10942
99.1k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
99.1k
  xmlParseMisc(ctxt);
10949
10950
99.1k
  if (RAW != 0) {
10951
28.4k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
28.4k
  }
10953
99.1k
  ctxt->instate = XML_PARSER_EOF;
10954
99.1k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
111k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
111k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
111k
    if ((ctxt->myDoc != NULL) &&
10966
111k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
230
  xmlFreeDoc(ctxt->myDoc);
10968
230
  ctxt->myDoc = NULL;
10969
230
    }
10970
10971
111k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
12.6k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
12.6k
  if (ctxt->valid)
10974
8.92k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
12.6k
  if (ctxt->nsWellFormed)
10976
11.7k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
12.6k
  if (ctxt->options & XML_PARSE_OLD10)
10978
2.08k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
12.6k
    }
10980
111k
    if (! ctxt->wellFormed) {
10981
99.0k
  ctxt->valid = 0;
10982
99.0k
  return(-1);
10983
99.0k
    }
10984
12.6k
    return(0);
10985
111k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
3.84M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
3.84M
    const xmlChar *cur;
11110
11111
3.84M
    if (ctxt->checkIndex == 0) {
11112
3.72M
        cur = ctxt->input->cur + 1;
11113
3.72M
    } else {
11114
127k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
127k
    }
11116
11117
3.84M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
137k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
137k
        return(0);
11120
3.71M
    } else {
11121
3.71M
        ctxt->checkIndex = 0;
11122
3.71M
        return(1);
11123
3.71M
    }
11124
3.84M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
910k
                     const char *str, size_t strLen) {
11138
910k
    const xmlChar *cur, *term;
11139
11140
910k
    if (ctxt->checkIndex == 0) {
11141
510k
        cur = ctxt->input->cur + startDelta;
11142
510k
    } else {
11143
400k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
400k
    }
11145
11146
910k
    term = BAD_CAST strstr((const char *) cur, str);
11147
910k
    if (term == NULL) {
11148
511k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
511k
        if ((size_t) (end - cur) < strLen)
11152
9.67k
            end = cur;
11153
502k
        else
11154
502k
            end -= strLen - 1;
11155
511k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
511k
    } else {
11157
399k
        ctxt->checkIndex = 0;
11158
399k
    }
11159
11160
910k
    return(term);
11161
910k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
5.74M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
5.74M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
5.74M
    const xmlChar *end = ctxt->input->end;
11173
11174
103M
    while (cur < end) {
11175
102M
        if ((*cur == '<') || (*cur == '&')) {
11176
5.11M
            ctxt->checkIndex = 0;
11177
5.11M
            return(1);
11178
5.11M
        }
11179
97.3M
        cur++;
11180
97.3M
    }
11181
11182
630k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
630k
    return(0);
11184
5.74M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
5.79M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
5.79M
    const xmlChar *cur;
11196
5.79M
    const xmlChar *end = ctxt->input->end;
11197
5.79M
    int state = ctxt->endCheckState;
11198
11199
5.79M
    if (ctxt->checkIndex == 0)
11200
4.65M
        cur = ctxt->input->cur + 1;
11201
1.14M
    else
11202
1.14M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
249M
    while (cur < end) {
11205
248M
        if (state) {
11206
141M
            if (*cur == state)
11207
6.20M
                state = 0;
11208
141M
        } else if (*cur == '\'' || *cur == '"') {
11209
6.24M
            state = *cur;
11210
100M
        } else if (*cur == '>') {
11211
4.58M
            ctxt->checkIndex = 0;
11212
4.58M
            ctxt->endCheckState = 0;
11213
4.58M
            return(1);
11214
4.58M
        }
11215
243M
        cur++;
11216
243M
    }
11217
11218
1.21M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
1.21M
    ctxt->endCheckState = state;
11220
1.21M
    return(0);
11221
5.79M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
439k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
439k
    const xmlChar *cur, *start;
11240
439k
    const xmlChar *end = ctxt->input->end;
11241
439k
    int state = ctxt->endCheckState;
11242
11243
439k
    if (ctxt->checkIndex == 0) {
11244
129k
        cur = ctxt->input->cur + 1;
11245
309k
    } else {
11246
309k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
309k
    }
11248
439k
    start = cur;
11249
11250
75.2M
    while (cur < end) {
11251
74.9M
        if (state == '-') {
11252
7.84M
            if ((*cur == '-') &&
11253
7.84M
                (cur[1] == '-') &&
11254
7.84M
                (cur[2] == '>')) {
11255
58.3k
                state = 0;
11256
58.3k
                cur += 3;
11257
58.3k
                start = cur;
11258
58.3k
                continue;
11259
58.3k
            }
11260
7.84M
        }
11261
67.1M
        else if (state == ']') {
11262
128k
            if (*cur == '>') {
11263
108k
                ctxt->checkIndex = 0;
11264
108k
                ctxt->endCheckState = 0;
11265
108k
                return(1);
11266
108k
            }
11267
19.8k
            if (IS_BLANK_CH(*cur)) {
11268
11.0k
                state = ' ';
11269
11.0k
            } else if (*cur != ']') {
11270
3.61k
                state = 0;
11271
3.61k
                start = cur;
11272
3.61k
                continue;
11273
3.61k
            }
11274
19.8k
        }
11275
66.9M
        else if (state == ' ') {
11276
287k
            if (*cur == '>') {
11277
312
                ctxt->checkIndex = 0;
11278
312
                ctxt->endCheckState = 0;
11279
312
                return(1);
11280
312
            }
11281
287k
            if (!IS_BLANK_CH(*cur)) {
11282
10.7k
                state = 0;
11283
10.7k
                start = cur;
11284
10.7k
                continue;
11285
10.7k
            }
11286
287k
        }
11287
66.6M
        else if (state != 0) {
11288
31.5M
            if (*cur == state) {
11289
741k
                state = 0;
11290
741k
                start = cur + 1;
11291
741k
            }
11292
31.5M
        }
11293
35.1M
        else if (*cur == '<') {
11294
971k
            if ((cur[1] == '!') &&
11295
971k
                (cur[2] == '-') &&
11296
971k
                (cur[3] == '-')) {
11297
59.6k
                state = '-';
11298
59.6k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
59.6k
                start = cur;
11301
59.6k
                continue;
11302
59.6k
            }
11303
971k
        }
11304
34.1M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
873k
            state = *cur;
11306
873k
        }
11307
11308
74.7M
        cur++;
11309
74.7M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
330k
    if ((state == 0) || (state == '-')) {
11316
175k
        if (cur - start < 3)
11317
15.3k
            cur = start;
11318
160k
        else
11319
160k
            cur -= 3;
11320
175k
    }
11321
330k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
330k
    ctxt->endCheckState = state;
11323
330k
    return(0);
11324
439k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
194k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
194k
    int ix;
11340
194k
    unsigned char c;
11341
194k
    int codepoint;
11342
11343
194k
    if ((utf == NULL) || (len <= 0))
11344
3.93k
        return(0);
11345
11346
9.58M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
9.54M
        c = utf[ix];
11348
9.54M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
8.72M
      if (c >= 0x20)
11350
7.99M
    ix++;
11351
728k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
719k
          ix++;
11353
8.48k
      else
11354
8.48k
          return(-ix);
11355
8.72M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
309k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
305k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
48.7k
          return(-ix);
11359
256k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
256k
      codepoint |= utf[ix+1] & 0x3f;
11361
256k
      if (!xmlIsCharQ(codepoint))
11362
4.79k
          return(-ix);
11363
252k
      ix += 2;
11364
509k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
266k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
262k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
262k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
17.6k
        return(-ix);
11369
244k
      codepoint = (utf[ix] & 0xf) << 12;
11370
244k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
244k
      codepoint |= utf[ix+2] & 0x3f;
11372
244k
      if (!xmlIsCharQ(codepoint))
11373
5.30k
          return(-ix);
11374
239k
      ix += 3;
11375
243k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
224k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
220k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
220k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
220k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
26.9k
        return(-ix);
11381
193k
      codepoint = (utf[ix] & 0x7) << 18;
11382
193k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
193k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
193k
      codepoint |= utf[ix+3] & 0x3f;
11385
193k
      if (!xmlIsCharQ(codepoint))
11386
8.28k
          return(-ix);
11387
185k
      ix += 4;
11388
185k
  } else       /* unknown encoding */
11389
19.3k
      return(-ix);
11390
9.54M
      }
11391
39.9k
      return(ix);
11392
191k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
3.38M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
3.38M
    int ret = 0;
11406
3.38M
    int avail, tlen;
11407
3.38M
    xmlChar cur, next;
11408
11409
3.38M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
3.38M
    if ((ctxt->input != NULL) &&
11466
3.38M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
52.7k
        xmlParserInputShrink(ctxt->input);
11468
52.7k
    }
11469
11470
29.0M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
29.0M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
68.5k
      return(0);
11473
11474
29.0M
  if (ctxt->input == NULL) break;
11475
29.0M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
29.0M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
29.0M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
29.0M
          (ctxt->input->buf->raw != NULL) &&
11488
29.0M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
108k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
108k
                                                 ctxt->input);
11491
108k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
108k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
108k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
108k
                                      base, current);
11496
108k
      }
11497
29.0M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
29.0M
        (ctxt->input->cur - ctxt->input->base);
11499
29.0M
  }
11500
29.0M
        if (avail < 1)
11501
183k
      goto done;
11502
28.8M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
663k
            case XML_PARSER_START:
11509
663k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
194k
        xmlChar start[4];
11511
194k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
194k
        if (avail < 4)
11517
2.15k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
192k
        start[0] = RAW;
11527
192k
        start[1] = NXT(1);
11528
192k
        start[2] = NXT(2);
11529
192k
        start[3] = NXT(3);
11530
192k
        enc = xmlDetectCharEncoding(start, 4);
11531
192k
        xmlSwitchEncoding(ctxt, enc);
11532
192k
        break;
11533
194k
    }
11534
11535
468k
    if (avail < 2)
11536
82
        goto done;
11537
468k
    cur = ctxt->input->cur[0];
11538
468k
    next = ctxt->input->cur[1];
11539
468k
    if (cur == 0) {
11540
430
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
430
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
430
                  &xmlDefaultSAXLocator);
11543
430
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
430
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
430
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
430
      ctxt->sax->endDocument(ctxt->userData);
11551
430
        goto done;
11552
430
    }
11553
468k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
354k
        if (avail < 5) goto done;
11556
354k
        if ((!terminate) &&
11557
354k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
202k
      goto done;
11559
152k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
152k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
152k
                  &xmlDefaultSAXLocator);
11562
152k
        if ((ctxt->input->cur[2] == 'x') &&
11563
152k
      (ctxt->input->cur[3] == 'm') &&
11564
152k
      (ctxt->input->cur[4] == 'l') &&
11565
152k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
139k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
139k
      xmlParseXMLDecl(ctxt);
11572
139k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
282
          xmlHaltParser(ctxt);
11578
282
          return(0);
11579
282
      }
11580
139k
      ctxt->standalone = ctxt->input->standalone;
11581
139k
      if ((ctxt->encoding == NULL) &&
11582
139k
          (ctxt->input->encoding != NULL))
11583
15.9k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
139k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
139k
          (!ctxt->disableSAX))
11586
133k
          ctxt->sax->startDocument(ctxt->userData);
11587
139k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
139k
        } else {
11593
12.7k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
12.7k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
12.7k
          (!ctxt->disableSAX))
11596
12.7k
          ctxt->sax->startDocument(ctxt->userData);
11597
12.7k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
12.7k
        }
11603
152k
    } else {
11604
113k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
113k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
113k
                  &xmlDefaultSAXLocator);
11607
113k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
113k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
113k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
113k
            (!ctxt->disableSAX))
11614
113k
      ctxt->sax->startDocument(ctxt->userData);
11615
113k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
113k
    }
11621
265k
    break;
11622
6.12M
            case XML_PARSER_START_TAG: {
11623
6.12M
          const xmlChar *name;
11624
6.12M
    const xmlChar *prefix = NULL;
11625
6.12M
    const xmlChar *URI = NULL;
11626
6.12M
                int line = ctxt->input->line;
11627
6.12M
    int nsNr = ctxt->nsNr;
11628
11629
6.12M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
6.12M
    cur = ctxt->input->cur[0];
11632
6.12M
          if (cur != '<') {
11633
14.9k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
14.9k
        xmlHaltParser(ctxt);
11635
14.9k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
14.9k
      ctxt->sax->endDocument(ctxt->userData);
11637
14.9k
        goto done;
11638
14.9k
    }
11639
6.11M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
1.11M
                    goto done;
11641
5.00M
    if (ctxt->spaceNr == 0)
11642
43.3k
        spacePush(ctxt, -1);
11643
4.95M
    else if (*ctxt->space == -2)
11644
581k
        spacePush(ctxt, -1);
11645
4.37M
    else
11646
4.37M
        spacePush(ctxt, *ctxt->space);
11647
5.00M
#ifdef LIBXML_SAX1_ENABLED
11648
5.00M
    if (ctxt->sax2)
11649
3.23M
#endif /* LIBXML_SAX1_ENABLED */
11650
3.23M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
1.76M
#ifdef LIBXML_SAX1_ENABLED
11652
1.76M
    else
11653
1.76M
        name = xmlParseStartTag(ctxt);
11654
5.00M
#endif /* LIBXML_SAX1_ENABLED */
11655
5.00M
    if (ctxt->instate == XML_PARSER_EOF)
11656
291
        goto done;
11657
5.00M
    if (name == NULL) {
11658
16.4k
        spacePop(ctxt);
11659
16.4k
        xmlHaltParser(ctxt);
11660
16.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
16.4k
      ctxt->sax->endDocument(ctxt->userData);
11662
16.4k
        goto done;
11663
16.4k
    }
11664
4.98M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
4.98M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
4.98M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
4.98M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
4.98M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
2.11M
        SKIP(2);
11680
11681
2.11M
        if (ctxt->sax2) {
11682
1.52M
      if ((ctxt->sax != NULL) &&
11683
1.52M
          (ctxt->sax->endElementNs != NULL) &&
11684
1.52M
          (!ctxt->disableSAX))
11685
1.51M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
1.51M
                                  prefix, URI);
11687
1.52M
      if (ctxt->nsNr - nsNr > 0)
11688
7.04k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
1.52M
#ifdef LIBXML_SAX1_ENABLED
11690
1.52M
        } else {
11691
598k
      if ((ctxt->sax != NULL) &&
11692
598k
          (ctxt->sax->endElement != NULL) &&
11693
598k
          (!ctxt->disableSAX))
11694
598k
          ctxt->sax->endElement(ctxt->userData, name);
11695
598k
#endif /* LIBXML_SAX1_ENABLED */
11696
598k
        }
11697
2.11M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
2.11M
        spacePop(ctxt);
11700
2.11M
        if (ctxt->nameNr == 0) {
11701
8.07k
      ctxt->instate = XML_PARSER_EPILOG;
11702
2.11M
        } else {
11703
2.11M
      ctxt->instate = XML_PARSER_CONTENT;
11704
2.11M
        }
11705
2.11M
        break;
11706
2.11M
    }
11707
2.86M
    if (RAW == '>') {
11708
2.58M
        NEXT;
11709
2.58M
    } else {
11710
275k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
275k
           "Couldn't find end of Start Tag %s\n",
11712
275k
           name);
11713
275k
        nodePop(ctxt);
11714
275k
        spacePop(ctxt);
11715
275k
    }
11716
2.86M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
2.86M
    ctxt->instate = XML_PARSER_CONTENT;
11719
2.86M
                break;
11720
4.98M
      }
11721
18.2M
            case XML_PARSER_CONTENT: {
11722
18.2M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
78.8k
        goto done;
11724
18.1M
    cur = ctxt->input->cur[0];
11725
18.1M
    next = ctxt->input->cur[1];
11726
11727
18.1M
    if ((cur == '<') && (next == '/')) {
11728
2.34M
        ctxt->instate = XML_PARSER_END_TAG;
11729
2.34M
        break;
11730
15.8M
          } else if ((cur == '<') && (next == '?')) {
11731
42.2k
        if ((!terminate) &&
11732
42.2k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
18.0k
      goto done;
11734
24.2k
        xmlParsePI(ctxt);
11735
24.2k
        ctxt->instate = XML_PARSER_CONTENT;
11736
15.7M
    } else if ((cur == '<') && (next != '!')) {
11737
4.82M
        ctxt->instate = XML_PARSER_START_TAG;
11738
4.82M
        break;
11739
10.9M
    } else if ((cur == '<') && (next == '!') &&
11740
10.9M
               (ctxt->input->cur[2] == '-') &&
11741
10.9M
         (ctxt->input->cur[3] == '-')) {
11742
210k
        if ((!terminate) &&
11743
210k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
88.5k
      goto done;
11745
121k
        xmlParseComment(ctxt);
11746
121k
        ctxt->instate = XML_PARSER_CONTENT;
11747
10.7M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
10.7M
        (ctxt->input->cur[2] == '[') &&
11749
10.7M
        (ctxt->input->cur[3] == 'C') &&
11750
10.7M
        (ctxt->input->cur[4] == 'D') &&
11751
10.7M
        (ctxt->input->cur[5] == 'A') &&
11752
10.7M
        (ctxt->input->cur[6] == 'T') &&
11753
10.7M
        (ctxt->input->cur[7] == 'A') &&
11754
10.7M
        (ctxt->input->cur[8] == '[')) {
11755
22.9k
        SKIP(9);
11756
22.9k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
22.9k
        break;
11758
10.6M
    } else if ((cur == '<') && (next == '!') &&
11759
10.6M
               (avail < 9)) {
11760
18.1k
        goto done;
11761
10.6M
    } else if (cur == '<') {
11762
223k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
223k
                    "detected an error in element content\n");
11764
223k
                    SKIP(1);
11765
10.4M
    } else if (cur == '&') {
11766
1.87M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
66.5k
      goto done;
11768
1.80M
        xmlParseReference(ctxt);
11769
8.58M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
8.58M
        if ((ctxt->inputNr == 1) &&
11783
8.58M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
6.01M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
630k
          goto done;
11786
6.01M
                    }
11787
7.95M
                    ctxt->checkIndex = 0;
11788
7.95M
        xmlParseCharData(ctxt, 0);
11789
7.95M
    }
11790
10.1M
    break;
11791
18.1M
      }
11792
10.1M
            case XML_PARSER_END_TAG:
11793
2.42M
    if (avail < 2)
11794
0
        goto done;
11795
2.42M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
71.4k
        goto done;
11797
2.34M
    if (ctxt->sax2) {
11798
1.42M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.42M
        nameNsPop(ctxt);
11800
1.42M
    }
11801
921k
#ifdef LIBXML_SAX1_ENABLED
11802
921k
      else
11803
921k
        xmlParseEndTag1(ctxt, 0);
11804
2.34M
#endif /* LIBXML_SAX1_ENABLED */
11805
2.34M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
2.34M
    } else if (ctxt->nameNr == 0) {
11808
38.2k
        ctxt->instate = XML_PARSER_EPILOG;
11809
2.31M
    } else {
11810
2.31M
        ctxt->instate = XML_PARSER_CONTENT;
11811
2.31M
    }
11812
2.34M
    break;
11813
251k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
251k
    const xmlChar *term;
11819
11820
251k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
6.45k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
6.45k
                                           "]]>");
11827
245k
                } else {
11828
245k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
245k
                }
11830
11831
251k
    if (term == NULL) {
11832
149k
        int tmp, size;
11833
11834
149k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
2.61k
                        size = ctxt->input->end - ctxt->input->cur;
11837
146k
                    } else {
11838
146k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
56.5k
                            goto done;
11840
90.2k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
90.2k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
90.2k
                    }
11844
92.8k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
92.8k
                    if (tmp <= 0) {
11846
65.3k
                        tmp = -tmp;
11847
65.3k
                        ctxt->input->cur += tmp;
11848
65.3k
                        goto encoding_error;
11849
65.3k
                    }
11850
27.4k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
27.4k
                        if (ctxt->sax->cdataBlock != NULL)
11852
17.1k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
17.1k
                                                  ctxt->input->cur, tmp);
11854
10.3k
                        else if (ctxt->sax->characters != NULL)
11855
10.3k
                            ctxt->sax->characters(ctxt->userData,
11856
10.3k
                                                  ctxt->input->cur, tmp);
11857
27.4k
                    }
11858
27.4k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
27.4k
                    SKIPL(tmp);
11861
102k
    } else {
11862
102k
                    int base = term - CUR_PTR;
11863
102k
        int tmp;
11864
11865
102k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
102k
        if ((tmp < 0) || (tmp != base)) {
11867
84.8k
      tmp = -tmp;
11868
84.8k
      ctxt->input->cur += tmp;
11869
84.8k
      goto encoding_error;
11870
84.8k
        }
11871
17.2k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
17.2k
            (ctxt->sax->cdataBlock != NULL) &&
11873
17.2k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
2.44k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
2.44k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
2.44k
                     "<![CDATA[", 9)))
11882
2.44k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
2.44k
                                 BAD_CAST "", 0);
11884
14.8k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
14.8k
      (!ctxt->disableSAX)) {
11886
13.3k
      if (ctxt->sax->cdataBlock != NULL)
11887
8.58k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
8.58k
              ctxt->input->cur, base);
11889
4.77k
      else if (ctxt->sax->characters != NULL)
11890
4.77k
          ctxt->sax->characters(ctxt->userData,
11891
4.77k
              ctxt->input->cur, base);
11892
13.3k
        }
11893
17.2k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
17.2k
        SKIPL(base + 3);
11896
17.2k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
17.2k
    }
11902
44.7k
    break;
11903
251k
      }
11904
457k
            case XML_PARSER_MISC:
11905
613k
            case XML_PARSER_PROLOG:
11906
665k
            case XML_PARSER_EPILOG:
11907
665k
    SKIP_BLANKS;
11908
665k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
665k
    else
11912
665k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
665k
                (ctxt->input->cur - ctxt->input->base);
11914
665k
    if (avail < 2)
11915
39.4k
        goto done;
11916
626k
    cur = ctxt->input->cur[0];
11917
626k
    next = ctxt->input->cur[1];
11918
626k
          if ((cur == '<') && (next == '?')) {
11919
54.8k
        if ((!terminate) &&
11920
54.8k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
16.2k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
38.5k
        xmlParsePI(ctxt);
11927
38.5k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
571k
    } else if ((cur == '<') && (next == '!') &&
11930
571k
        (ctxt->input->cur[2] == '-') &&
11931
571k
        (ctxt->input->cur[3] == '-')) {
11932
62.8k
        if ((!terminate) &&
11933
62.8k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
39.6k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
23.2k
        xmlParseComment(ctxt);
11940
23.2k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
508k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
508k
                    (cur == '<') && (next == '!') &&
11944
508k
        (ctxt->input->cur[2] == 'D') &&
11945
508k
        (ctxt->input->cur[3] == 'O') &&
11946
508k
        (ctxt->input->cur[4] == 'C') &&
11947
508k
        (ctxt->input->cur[5] == 'T') &&
11948
508k
        (ctxt->input->cur[6] == 'Y') &&
11949
508k
        (ctxt->input->cur[7] == 'P') &&
11950
508k
        (ctxt->input->cur[8] == 'E')) {
11951
266k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
100k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
166k
        ctxt->inSubset = 1;
11958
166k
        xmlParseDocTypeDecl(ctxt);
11959
166k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
166k
        if (RAW == '[') {
11962
133k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
133k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
32.3k
      ctxt->inSubset = 2;
11972
32.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
32.3k
          (ctxt->sax->externalSubset != NULL))
11974
30.7k
          ctxt->sax->externalSubset(ctxt->userData,
11975
30.7k
            ctxt->intSubName, ctxt->extSubSystem,
11976
30.7k
            ctxt->extSubURI);
11977
32.3k
      ctxt->inSubset = 0;
11978
32.3k
      xmlCleanSpecialAttr(ctxt);
11979
32.3k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
32.3k
        }
11985
242k
    } else if ((cur == '<') && (next == '!') &&
11986
242k
               (avail <
11987
33.0k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
31.1k
        goto done;
11989
211k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
9.18k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
9.18k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
9.18k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
9.18k
      ctxt->sax->endDocument(ctxt->userData);
11998
9.18k
        goto done;
11999
202k
                } else {
12000
202k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
202k
    }
12006
430k
    break;
12007
463k
            case XML_PARSER_DTD: {
12008
463k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
330k
                    goto done;
12010
132k
    xmlParseInternalSubset(ctxt);
12011
132k
    if (ctxt->instate == XML_PARSER_EOF)
12012
32.5k
        goto done;
12013
100k
    ctxt->inSubset = 2;
12014
100k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
100k
        (ctxt->sax->externalSubset != NULL))
12016
96.2k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
96.2k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
100k
    ctxt->inSubset = 0;
12019
100k
    xmlCleanSpecialAttr(ctxt);
12020
100k
    if (ctxt->instate == XML_PARSER_EOF)
12021
2.22k
        goto done;
12022
97.8k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
97.8k
                break;
12028
100k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
28.8M
  }
12102
28.8M
    }
12103
3.16M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
3.16M
    return(ret);
12108
150k
encoding_error:
12109
150k
    {
12110
150k
        char buffer[150];
12111
12112
150k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
150k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
150k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
150k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
150k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
150k
         BAD_CAST buffer, NULL);
12118
150k
    }
12119
150k
    return(0);
12120
3.38M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
4.21M
              int terminate) {
12136
4.21M
    int end_in_lf = 0;
12137
4.21M
    int remain = 0;
12138
12139
4.21M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
4.21M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
853k
        return(ctxt->errNo);
12143
3.36M
    if (ctxt->instate == XML_PARSER_EOF)
12144
121
        return(-1);
12145
3.36M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
3.36M
    ctxt->progressive = 1;
12149
3.36M
    if (ctxt->instate == XML_PARSER_START)
12150
454k
        xmlDetectSAX2(ctxt);
12151
3.36M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
3.36M
        (chunk[size - 1] == '\r')) {
12153
24.3k
  end_in_lf = 1;
12154
24.3k
  size--;
12155
24.3k
    }
12156
12157
3.38M
xmldecl_done:
12158
12159
3.38M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
3.38M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
3.18M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
3.18M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
3.18M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
3.18M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
3.18M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
26.1k
            unsigned int len = 45;
12173
12174
26.1k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
26.1k
                               BAD_CAST "UTF-16")) ||
12176
26.1k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
4.98k
                               BAD_CAST "UTF16")))
12178
21.1k
                len = 90;
12179
4.98k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
4.98k
                                    BAD_CAST "UCS-4")) ||
12181
4.98k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
4.86k
                                    BAD_CAST "UCS4")))
12183
113
                len = 180;
12184
12185
26.1k
            if (ctxt->input->buf->rawconsumed < len)
12186
1.77k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
26.1k
            if ((unsigned int) size > len) {
12194
17.4k
                remain = size - len;
12195
17.4k
                size = len;
12196
17.4k
            } else {
12197
8.65k
                remain = 0;
12198
8.65k
            }
12199
26.1k
        }
12200
3.18M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
3.18M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
3.18M
  if (res < 0) {
12203
442
      ctxt->errNo = XML_PARSER_EOF;
12204
442
      xmlHaltParser(ctxt);
12205
442
      return (XML_PARSER_EOF);
12206
442
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
3.18M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
194k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
194k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
194k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
194k
        (in->raw != NULL)) {
12216
11.5k
    int nbchars;
12217
11.5k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
11.5k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
11.5k
    nbchars = xmlCharEncInput(in, terminate);
12221
11.5k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
11.5k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
241
        xmlGenericError(xmlGenericErrorContext,
12225
241
            "xmlParseChunk: encoder error\n");
12226
241
                    xmlHaltParser(ctxt);
12227
241
        return(XML_ERR_INVALID_ENCODING);
12228
241
    }
12229
11.5k
      }
12230
194k
  }
12231
194k
    }
12232
12233
3.38M
    if (remain != 0) {
12234
17.3k
        xmlParseTryOrFinish(ctxt, 0);
12235
3.36M
    } else {
12236
3.36M
        xmlParseTryOrFinish(ctxt, terminate);
12237
3.36M
    }
12238
3.38M
    if (ctxt->instate == XML_PARSER_EOF)
12239
76.7k
        return(ctxt->errNo);
12240
12241
3.30M
    if ((ctxt->input != NULL) &&
12242
3.30M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
3.30M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
3.30M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
3.30M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
69.6k
        return(ctxt->errNo);
12250
12251
3.23M
    if (remain != 0) {
12252
16.9k
        chunk += size;
12253
16.9k
        size = remain;
12254
16.9k
        remain = 0;
12255
16.9k
        goto xmldecl_done;
12256
16.9k
    }
12257
3.21M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
3.21M
        (ctxt->input->buf != NULL)) {
12259
23.8k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
23.8k
           ctxt->input);
12261
23.8k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
23.8k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
23.8k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
23.8k
            base, current);
12267
23.8k
    }
12268
3.21M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
84.8k
  int cur_avail = 0;
12273
12274
84.8k
  if (ctxt->input != NULL) {
12275
84.8k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
84.8k
      else
12279
84.8k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
84.8k
                    (ctxt->input->cur - ctxt->input->base);
12281
84.8k
  }
12282
12283
84.8k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
84.8k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
54.1k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
54.1k
  }
12287
84.8k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
527
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
527
  }
12290
84.8k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
84.8k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
84.8k
    ctxt->sax->endDocument(ctxt->userData);
12293
84.8k
  }
12294
84.8k
  ctxt->instate = XML_PARSER_EOF;
12295
84.8k
    }
12296
3.21M
    if (ctxt->wellFormed == 0)
12297
1.08M
  return((xmlParserErrors) ctxt->errNo);
12298
2.12M
    else
12299
2.12M
        return(0);
12300
3.21M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
267k
                        const char *chunk, int size, const char *filename) {
12330
267k
    xmlParserCtxtPtr ctxt;
12331
267k
    xmlParserInputPtr inputStream;
12332
267k
    xmlParserInputBufferPtr buf;
12333
267k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
267k
    if ((chunk != NULL) && (size >= 4))
12339
133k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
267k
    buf = xmlAllocParserInputBuffer(enc);
12342
267k
    if (buf == NULL) return(NULL);
12343
12344
267k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
267k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
267k
    ctxt->dictNames = 1;
12351
267k
    if (filename == NULL) {
12352
133k
  ctxt->directory = NULL;
12353
133k
    } else {
12354
133k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
133k
    }
12356
12357
267k
    inputStream = xmlNewInputStream(ctxt);
12358
267k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
267k
    if (filename == NULL)
12365
133k
  inputStream->filename = NULL;
12366
133k
    else {
12367
133k
  inputStream->filename = (char *)
12368
133k
      xmlCanonicPath((const xmlChar *) filename);
12369
133k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
133k
    }
12376
267k
    inputStream->buf = buf;
12377
267k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
267k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
267k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
267k
    if ((size != 0) && (chunk != NULL) &&
12388
267k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
133k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
133k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
133k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
133k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
133k
    }
12399
12400
267k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
74.2k
        xmlSwitchEncoding(ctxt, enc);
12402
74.2k
    }
12403
12404
267k
    return(ctxt);
12405
267k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
244k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
244k
    if (ctxt == NULL)
12418
0
        return;
12419
244k
    ctxt->instate = XML_PARSER_EOF;
12420
244k
    ctxt->disableSAX = 1;
12421
261k
    while (ctxt->inputNr > 1)
12422
16.7k
        xmlFreeInputStream(inputPop(ctxt));
12423
244k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
244k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
244k
        if (ctxt->input->buf != NULL) {
12433
210k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
210k
            ctxt->input->buf = NULL;
12435
210k
        }
12436
244k
  ctxt->input->cur = BAD_CAST"";
12437
244k
        ctxt->input->length = 0;
12438
244k
  ctxt->input->base = ctxt->input->cur;
12439
244k
        ctxt->input->end = ctxt->input->cur;
12440
244k
    }
12441
244k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
134k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
134k
    if (ctxt == NULL)
12452
0
        return;
12453
134k
    xmlHaltParser(ctxt);
12454
134k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
134k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
553k
          const xmlChar *ID, xmlNodePtr *list) {
12832
553k
    xmlParserCtxtPtr ctxt;
12833
553k
    xmlDocPtr newDoc;
12834
553k
    xmlNodePtr newRoot;
12835
553k
    xmlParserErrors ret = XML_ERR_OK;
12836
553k
    xmlChar start[4];
12837
553k
    xmlCharEncoding enc;
12838
12839
553k
    if (((depth > 40) &&
12840
553k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
553k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
553k
    if (list != NULL)
12848
57.6k
        *list = NULL;
12849
553k
    if ((URL == NULL) && (ID == NULL))
12850
134
  return(XML_ERR_INTERNAL_ERROR);
12851
553k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
553k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
553k
                                             oldctxt);
12856
553k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
79.2k
    if (oldctxt != NULL) {
12858
79.2k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
79.2k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
79.2k
    }
12861
79.2k
    xmlDetectSAX2(ctxt);
12862
12863
79.2k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
79.2k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
79.2k
    newDoc->properties = XML_DOC_INTERNAL;
12869
79.2k
    if (doc) {
12870
79.2k
        newDoc->intSubset = doc->intSubset;
12871
79.2k
        newDoc->extSubset = doc->extSubset;
12872
79.2k
        if (doc->dict) {
12873
52.3k
            newDoc->dict = doc->dict;
12874
52.3k
            xmlDictReference(newDoc->dict);
12875
52.3k
        }
12876
79.2k
        if (doc->URL != NULL) {
12877
51.8k
            newDoc->URL = xmlStrdup(doc->URL);
12878
51.8k
        }
12879
79.2k
    }
12880
79.2k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
79.2k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
79.2k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
79.2k
    nodePush(ctxt, newDoc->children);
12891
79.2k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
79.2k
    } else {
12894
79.2k
        ctxt->myDoc = doc;
12895
79.2k
        newRoot->doc = doc;
12896
79.2k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
79.2k
    GROW;
12904
79.2k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
75.8k
  start[0] = RAW;
12906
75.8k
  start[1] = NXT(1);
12907
75.8k
  start[2] = NXT(2);
12908
75.8k
  start[3] = NXT(3);
12909
75.8k
  enc = xmlDetectCharEncoding(start, 4);
12910
75.8k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
5.02k
      xmlSwitchEncoding(ctxt, enc);
12912
5.02k
  }
12913
75.8k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
79.2k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
3.46k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
3.46k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
3.46k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
112
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
112
                           "Version mismatch between document and entity\n");
12927
112
        }
12928
3.46k
    }
12929
12930
79.2k
    ctxt->instate = XML_PARSER_CONTENT;
12931
79.2k
    ctxt->depth = depth;
12932
79.2k
    if (oldctxt != NULL) {
12933
79.2k
  ctxt->_private = oldctxt->_private;
12934
79.2k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
79.2k
  ctxt->validate = oldctxt->validate;
12936
79.2k
  ctxt->valid = oldctxt->valid;
12937
79.2k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
79.2k
        if (oldctxt->validate) {
12939
42.1k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
42.1k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
42.1k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
42.1k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
42.1k
        }
12944
79.2k
  ctxt->external = oldctxt->external;
12945
79.2k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
79.2k
        ctxt->dict = oldctxt->dict;
12947
79.2k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
79.2k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
79.2k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
79.2k
        ctxt->dictNames = oldctxt->dictNames;
12951
79.2k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
79.2k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
79.2k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
79.2k
  ctxt->record_info = oldctxt->record_info;
12955
79.2k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
79.2k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
79.2k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
79.2k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
79.2k
    xmlParseContent(ctxt);
12970
12971
79.2k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
1.99k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
77.2k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
79.2k
    if (ctxt->node != newDoc->children) {
12977
11.1k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
11.1k
    }
12979
12980
79.2k
    if (!ctxt->wellFormed) {
12981
23.0k
  ret = (xmlParserErrors)ctxt->errNo;
12982
23.0k
        if (oldctxt != NULL) {
12983
23.0k
            oldctxt->errNo = ctxt->errNo;
12984
23.0k
            oldctxt->wellFormed = 0;
12985
23.0k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
23.0k
        }
12987
56.2k
    } else {
12988
56.2k
  if (list != NULL) {
12989
6.51k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
6.51k
      cur = newDoc->children->children;
12996
6.51k
      *list = cur;
12997
1.78M
      while (cur != NULL) {
12998
1.78M
    cur->parent = NULL;
12999
1.78M
    cur = cur->next;
13000
1.78M
      }
13001
6.51k
            newDoc->children->children = NULL;
13002
6.51k
  }
13003
56.2k
  ret = XML_ERR_OK;
13004
56.2k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
79.2k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
79.2k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
79.2k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
79.2k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
79.2k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
79.2k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
79.2k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
79.2k
    }
13020
13021
79.2k
    if (oldctxt != NULL) {
13022
79.2k
        ctxt->dict = NULL;
13023
79.2k
        ctxt->attsDefault = NULL;
13024
79.2k
        ctxt->attsSpecial = NULL;
13025
79.2k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
79.2k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
79.2k
        oldctxt->validate = ctxt->validate;
13028
79.2k
        oldctxt->valid = ctxt->valid;
13029
79.2k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
79.2k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
79.2k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
79.2k
    }
13033
79.2k
    ctxt->node_seq.maximum = 0;
13034
79.2k
    ctxt->node_seq.length = 0;
13035
79.2k
    ctxt->node_seq.buffer = NULL;
13036
79.2k
    xmlFreeParserCtxt(ctxt);
13037
79.2k
    newDoc->intSubset = NULL;
13038
79.2k
    newDoc->extSubset = NULL;
13039
79.2k
    xmlFreeDoc(newDoc);
13040
13041
79.2k
    return(ret);
13042
79.2k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
91.5k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
91.5k
    xmlParserCtxtPtr ctxt;
13125
91.5k
    xmlDocPtr newDoc = NULL;
13126
91.5k
    xmlNodePtr newRoot;
13127
91.5k
    xmlSAXHandlerPtr oldsax = NULL;
13128
91.5k
    xmlNodePtr content = NULL;
13129
91.5k
    xmlNodePtr last = NULL;
13130
91.5k
    int size;
13131
91.5k
    xmlParserErrors ret = XML_ERR_OK;
13132
91.5k
#ifdef SAX2
13133
91.5k
    int i;
13134
91.5k
#endif
13135
13136
91.5k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
91.5k
        (oldctxt->depth >  100)) {
13138
87
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
87
                       "Maximum entity nesting depth exceeded");
13140
87
  return(XML_ERR_ENTITY_LOOP);
13141
87
    }
13142
13143
13144
91.4k
    if (lst != NULL)
13145
52.9k
        *lst = NULL;
13146
91.4k
    if (string == NULL)
13147
83
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
91.3k
    size = xmlStrlen(string);
13150
13151
91.3k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
91.3k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
66.6k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
66.6k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
66.6k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
66.6k
    else
13158
66.6k
  ctxt->userData = ctxt;
13159
66.6k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
66.6k
    ctxt->dict = oldctxt->dict;
13161
66.6k
    ctxt->input_id = oldctxt->input_id;
13162
66.6k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
66.6k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
66.6k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
66.6k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
67.0k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
366
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
366
    }
13171
66.6k
#endif
13172
13173
66.6k
    oldsax = ctxt->sax;
13174
66.6k
    ctxt->sax = oldctxt->sax;
13175
66.6k
    xmlDetectSAX2(ctxt);
13176
66.6k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
66.6k
    ctxt->options = oldctxt->options;
13178
13179
66.6k
    ctxt->_private = oldctxt->_private;
13180
66.6k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
66.6k
    } else {
13193
66.6k
  ctxt->myDoc = oldctxt->myDoc;
13194
66.6k
        content = ctxt->myDoc->children;
13195
66.6k
  last = ctxt->myDoc->last;
13196
66.6k
    }
13197
66.6k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
66.6k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
66.6k
    ctxt->myDoc->children = NULL;
13208
66.6k
    ctxt->myDoc->last = NULL;
13209
66.6k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
66.6k
    nodePush(ctxt, ctxt->myDoc->children);
13211
66.6k
    ctxt->instate = XML_PARSER_CONTENT;
13212
66.6k
    ctxt->depth = oldctxt->depth;
13213
13214
66.6k
    ctxt->validate = 0;
13215
66.6k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
66.6k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
55.9k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
55.9k
    }
13222
66.6k
    ctxt->dictNames = oldctxt->dictNames;
13223
66.6k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
66.6k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
66.6k
    xmlParseContent(ctxt);
13227
66.6k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
174
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
66.4k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
66.6k
    if (ctxt->node != ctxt->myDoc->children) {
13233
970
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
970
    }
13235
13236
66.6k
    if (!ctxt->wellFormed) {
13237
9.39k
  ret = (xmlParserErrors)ctxt->errNo;
13238
9.39k
        oldctxt->errNo = ctxt->errNo;
13239
9.39k
        oldctxt->wellFormed = 0;
13240
9.39k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
57.2k
    } else {
13242
57.2k
        ret = XML_ERR_OK;
13243
57.2k
    }
13244
13245
66.6k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
43.2k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
43.2k
  cur = ctxt->myDoc->children->children;
13253
43.2k
  *lst = cur;
13254
149k
  while (cur != NULL) {
13255
106k
#ifdef LIBXML_VALID_ENABLED
13256
106k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
106k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
106k
    (cur->type == XML_ELEMENT_NODE)) {
13259
16.9k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
16.9k
      oldctxt->myDoc, cur);
13261
16.9k
      }
13262
106k
#endif /* LIBXML_VALID_ENABLED */
13263
106k
      cur->parent = NULL;
13264
106k
      cur = cur->next;
13265
106k
  }
13266
43.2k
  ctxt->myDoc->children->children = NULL;
13267
43.2k
    }
13268
66.6k
    if (ctxt->myDoc != NULL) {
13269
66.6k
  xmlFreeNode(ctxt->myDoc->children);
13270
66.6k
        ctxt->myDoc->children = content;
13271
66.6k
        ctxt->myDoc->last = last;
13272
66.6k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
66.6k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
66.6k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
66.6k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
66.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
66.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
66.6k
    }
13285
13286
66.6k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
66.6k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
66.6k
    ctxt->sax = oldsax;
13289
66.6k
    ctxt->dict = NULL;
13290
66.6k
    ctxt->attsDefault = NULL;
13291
66.6k
    ctxt->attsSpecial = NULL;
13292
66.6k
    xmlFreeParserCtxt(ctxt);
13293
66.6k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
66.6k
    return(ret);
13298
66.6k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
553k
        xmlParserCtxtPtr pctx) {
13783
553k
    xmlParserCtxtPtr ctxt;
13784
553k
    xmlParserInputPtr inputStream;
13785
553k
    char *directory = NULL;
13786
553k
    xmlChar *uri;
13787
13788
553k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
553k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
553k
    if (pctx != NULL) {
13794
553k
        ctxt->options = pctx->options;
13795
553k
        ctxt->_private = pctx->_private;
13796
553k
  ctxt->input_id = pctx->input_id;
13797
553k
    }
13798
13799
    /* Don't read from stdin. */
13800
553k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
553k
    uri = xmlBuildURI(URL, base);
13804
13805
553k
    if (uri == NULL) {
13806
13.6k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
13.6k
  if (inputStream == NULL) {
13808
11.9k
      xmlFreeParserCtxt(ctxt);
13809
11.9k
      return(NULL);
13810
11.9k
  }
13811
13812
1.68k
  inputPush(ctxt, inputStream);
13813
13814
1.68k
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
1.68k
      directory = xmlParserGetDirectory((char *)URL);
13816
1.68k
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
1.68k
      ctxt->directory = directory;
13818
539k
    } else {
13819
539k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
539k
  if (inputStream == NULL) {
13821
462k
      xmlFree(uri);
13822
462k
      xmlFreeParserCtxt(ctxt);
13823
462k
      return(NULL);
13824
462k
  }
13825
13826
77.5k
  inputPush(ctxt, inputStream);
13827
13828
77.5k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
77.5k
      directory = xmlParserGetDirectory((char *)uri);
13830
77.5k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
77.5k
      ctxt->directory = directory;
13832
77.5k
  xmlFree(uri);
13833
77.5k
    }
13834
79.2k
    return(ctxt);
13835
553k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
225k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
225k
    xmlParserCtxtPtr ctxt;
14178
225k
    xmlParserInputPtr input;
14179
225k
    xmlParserInputBufferPtr buf;
14180
14181
225k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
225k
    if (size <= 0)
14184
24.7k
  return(NULL);
14185
14186
200k
    ctxt = xmlNewParserCtxt();
14187
200k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
200k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
200k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
200k
    input = xmlNewInputStream(ctxt);
14197
200k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
200k
    input->filename = NULL;
14204
200k
    input->buf = buf;
14205
200k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
200k
    inputPush(ctxt, input);
14208
200k
    return(ctxt);
14209
200k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
290M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
290M
    if (xmlParserInitialized != 0)
14525
290M
  return;
14526
14527
2.59k
#ifdef LIBXML_THREAD_ENABLED
14528
2.59k
    __xmlGlobalInitMutexLock();
14529
2.59k
    if (xmlParserInitialized == 0) {
14530
2.59k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
2.59k
  xmlInitThreadsInternal();
14537
2.59k
  xmlInitGlobalsInternal();
14538
2.59k
  xmlInitMemoryInternal();
14539
2.59k
        __xmlInitializeDict();
14540
2.59k
  xmlInitEncodingInternal();
14541
2.59k
  xmlRegisterDefaultInputCallbacks();
14542
2.59k
#ifdef LIBXML_OUTPUT_ENABLED
14543
2.59k
  xmlRegisterDefaultOutputCallbacks();
14544
2.59k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
2.59k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
2.59k
  xmlInitXPathInternal();
14547
2.59k
#endif
14548
2.59k
  xmlParserInitialized = 1;
14549
2.59k
#ifdef LIBXML_THREAD_ENABLED
14550
2.59k
    }
14551
2.59k
    __xmlGlobalInitMutexUnlock();
14552
2.59k
#endif
14553
2.59k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
401k
{
14843
401k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
401k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
401k
    if (options & XML_PARSE_RECOVER) {
14851
217k
        ctxt->recovery = 1;
14852
217k
        options -= XML_PARSE_RECOVER;
14853
217k
  ctxt->options |= XML_PARSE_RECOVER;
14854
217k
    } else
14855
183k
        ctxt->recovery = 0;
14856
401k
    if (options & XML_PARSE_DTDLOAD) {
14857
250k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
250k
        options -= XML_PARSE_DTDLOAD;
14859
250k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
250k
    } else
14861
151k
        ctxt->loadsubset = 0;
14862
401k
    if (options & XML_PARSE_DTDATTR) {
14863
157k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
157k
        options -= XML_PARSE_DTDATTR;
14865
157k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
157k
    }
14867
401k
    if (options & XML_PARSE_NOENT) {
14868
225k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
225k
        options -= XML_PARSE_NOENT;
14871
225k
  ctxt->options |= XML_PARSE_NOENT;
14872
225k
    } else
14873
175k
        ctxt->replaceEntities = 0;
14874
401k
    if (options & XML_PARSE_PEDANTIC) {
14875
52.6k
        ctxt->pedantic = 1;
14876
52.6k
        options -= XML_PARSE_PEDANTIC;
14877
52.6k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
52.6k
    } else
14879
348k
        ctxt->pedantic = 0;
14880
401k
    if (options & XML_PARSE_NOBLANKS) {
14881
155k
        ctxt->keepBlanks = 0;
14882
155k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
155k
        options -= XML_PARSE_NOBLANKS;
14884
155k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
155k
    } else
14886
246k
        ctxt->keepBlanks = 1;
14887
401k
    if (options & XML_PARSE_DTDVALID) {
14888
172k
        ctxt->validate = 1;
14889
172k
        if (options & XML_PARSE_NOWARNING)
14890
82.0k
            ctxt->vctxt.warning = NULL;
14891
172k
        if (options & XML_PARSE_NOERROR)
14892
135k
            ctxt->vctxt.error = NULL;
14893
172k
        options -= XML_PARSE_DTDVALID;
14894
172k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
172k
    } else
14896
228k
        ctxt->validate = 0;
14897
401k
    if (options & XML_PARSE_NOWARNING) {
14898
166k
        ctxt->sax->warning = NULL;
14899
166k
        options -= XML_PARSE_NOWARNING;
14900
166k
    }
14901
401k
    if (options & XML_PARSE_NOERROR) {
14902
242k
        ctxt->sax->error = NULL;
14903
242k
        ctxt->sax->fatalError = NULL;
14904
242k
        options -= XML_PARSE_NOERROR;
14905
242k
    }
14906
401k
#ifdef LIBXML_SAX1_ENABLED
14907
401k
    if (options & XML_PARSE_SAX1) {
14908
152k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
152k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
152k
        ctxt->sax->startElementNs = NULL;
14911
152k
        ctxt->sax->endElementNs = NULL;
14912
152k
        ctxt->sax->initialized = 1;
14913
152k
        options -= XML_PARSE_SAX1;
14914
152k
  ctxt->options |= XML_PARSE_SAX1;
14915
152k
    }
14916
401k
#endif /* LIBXML_SAX1_ENABLED */
14917
401k
    if (options & XML_PARSE_NODICT) {
14918
139k
        ctxt->dictNames = 0;
14919
139k
        options -= XML_PARSE_NODICT;
14920
139k
  ctxt->options |= XML_PARSE_NODICT;
14921
261k
    } else {
14922
261k
        ctxt->dictNames = 1;
14923
261k
    }
14924
401k
    if (options & XML_PARSE_NOCDATA) {
14925
158k
        ctxt->sax->cdataBlock = NULL;
14926
158k
        options -= XML_PARSE_NOCDATA;
14927
158k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
158k
    }
14929
401k
    if (options & XML_PARSE_NSCLEAN) {
14930
235k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
235k
        options -= XML_PARSE_NSCLEAN;
14932
235k
    }
14933
401k
    if (options & XML_PARSE_NONET) {
14934
144k
  ctxt->options |= XML_PARSE_NONET;
14935
144k
        options -= XML_PARSE_NONET;
14936
144k
    }
14937
401k
    if (options & XML_PARSE_COMPACT) {
14938
234k
  ctxt->options |= XML_PARSE_COMPACT;
14939
234k
        options -= XML_PARSE_COMPACT;
14940
234k
    }
14941
401k
    if (options & XML_PARSE_OLD10) {
14942
137k
  ctxt->options |= XML_PARSE_OLD10;
14943
137k
        options -= XML_PARSE_OLD10;
14944
137k
    }
14945
401k
    if (options & XML_PARSE_NOBASEFIX) {
14946
165k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
165k
        options -= XML_PARSE_NOBASEFIX;
14948
165k
    }
14949
401k
    if (options & XML_PARSE_HUGE) {
14950
141k
  ctxt->options |= XML_PARSE_HUGE;
14951
141k
        options -= XML_PARSE_HUGE;
14952
141k
        if (ctxt->dict != NULL)
14953
141k
            xmlDictSetLimit(ctxt->dict, 0);
14954
141k
    }
14955
401k
    if (options & XML_PARSE_OLDSAX) {
14956
141k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
141k
        options -= XML_PARSE_OLDSAX;
14958
141k
    }
14959
401k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
228k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
228k
        options -= XML_PARSE_IGNORE_ENC;
14962
228k
    }
14963
401k
    if (options & XML_PARSE_BIG_LINES) {
14964
164k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
164k
        options -= XML_PARSE_BIG_LINES;
14966
164k
    }
14967
401k
    ctxt->linenumbers = 1;
14968
401k
    return (options);
14969
401k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
267k
{
14984
267k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
267k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
133k
{
15003
133k
    xmlDocPtr ret;
15004
15005
133k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
133k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
133k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
133k
        (ctxt->input->filename == NULL))
15015
133k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
133k
    xmlParseDocument(ctxt);
15017
133k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
82.6k
        ret = ctxt->myDoc;
15019
51.0k
    else {
15020
51.0k
        ret = NULL;
15021
51.0k
  if (ctxt->myDoc != NULL) {
15022
47.8k
      xmlFreeDoc(ctxt->myDoc);
15023
47.8k
  }
15024
51.0k
    }
15025
133k
    ctxt->myDoc = NULL;
15026
133k
    if (!reuse) {
15027
133k
  xmlFreeParserCtxt(ctxt);
15028
133k
    }
15029
15030
133k
    return (ret);
15031
133k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
133k
{
15096
133k
    xmlParserCtxtPtr ctxt;
15097
15098
133k
    xmlInitParser();
15099
133k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
133k
    if (ctxt == NULL)
15101
35
        return (NULL);
15102
133k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
133k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387