Coverage Report

Created: 2023-09-24 16:02

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
4.82M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
748
#define XML_PARSER_NON_LINEAR 10
129
130
48.2M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
200M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
16.0G
#define XML_PARSER_BUFFER_SIZE 100
147
860k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
60.8M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
120k
{
215
120k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
120k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
120k
    if (ctxt != NULL)
219
120k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
120k
    if (prefix == NULL)
222
71.8k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
71.8k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
71.8k
                        (const char *) localname, NULL, NULL, 0, 0,
225
71.8k
                        "Attribute %s redefined\n", localname);
226
49.0k
    else
227
49.0k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
49.0k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
49.0k
                        (const char *) prefix, (const char *) localname,
230
49.0k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
49.0k
                        localname);
232
120k
    if (ctxt != NULL) {
233
120k
  ctxt->wellFormed = 0;
234
120k
  if (ctxt->recovery == 0)
235
31.5k
      ctxt->disableSAX = 1;
236
120k
    }
237
120k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
6.86M
{
250
6.86M
    const char *errmsg;
251
252
6.86M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
6.86M
        (ctxt->instate == XML_PARSER_EOF))
254
34.6k
  return;
255
6.82M
    switch (error) {
256
146k
        case XML_ERR_INVALID_HEX_CHARREF:
257
146k
            errmsg = "CharRef: invalid hexadecimal value";
258
146k
            break;
259
181k
        case XML_ERR_INVALID_DEC_CHARREF:
260
181k
            errmsg = "CharRef: invalid decimal value";
261
181k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
2.90M
        case XML_ERR_INTERNAL_ERROR:
266
2.90M
            errmsg = "internal error";
267
2.90M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
15.0k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
15.0k
            errmsg = "PEReference: expecting ';'";
282
15.0k
            break;
283
2.15k
        case XML_ERR_ENTITY_LOOP:
284
2.15k
            errmsg = "Detected an entity reference loop";
285
2.15k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
3.42k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
3.42k
            errmsg = "PEReferences forbidden in internal subset";
291
3.42k
            break;
292
5.68k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
5.68k
            errmsg = "EntityValue: \" or ' expected";
294
5.68k
            break;
295
178k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
178k
            errmsg = "AttValue: \" or ' expected";
297
178k
            break;
298
722k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
722k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
722k
            break;
301
15.4k
        case XML_ERR_LITERAL_NOT_STARTED:
302
15.4k
            errmsg = "SystemLiteral \" or ' expected";
303
15.4k
            break;
304
21.5k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
21.5k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
21.5k
            break;
307
279k
        case XML_ERR_MISPLACED_CDATA_END:
308
279k
            errmsg = "Sequence ']]>' not allowed in content";
309
279k
            break;
310
13.3k
        case XML_ERR_URI_REQUIRED:
311
13.3k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
13.3k
            break;
313
2.14k
        case XML_ERR_PUBID_REQUIRED:
314
2.14k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
2.14k
            break;
316
154k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
154k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
154k
            break;
319
92.0k
        case XML_ERR_PI_NOT_STARTED:
320
92.0k
            errmsg = "xmlParsePI : no target name";
321
92.0k
            break;
322
32.8k
        case XML_ERR_RESERVED_XML_NAME:
323
32.8k
            errmsg = "Invalid PI name";
324
32.8k
            break;
325
1.63k
        case XML_ERR_NOTATION_NOT_STARTED:
326
1.63k
            errmsg = "NOTATION: Name expected here";
327
1.63k
            break;
328
11.8k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
11.8k
            errmsg = "'>' required to close NOTATION declaration";
330
11.8k
            break;
331
16.7k
        case XML_ERR_VALUE_REQUIRED:
332
16.7k
            errmsg = "Entity value required";
333
16.7k
            break;
334
5.37k
        case XML_ERR_URI_FRAGMENT:
335
5.37k
            errmsg = "Fragment not allowed";
336
5.37k
            break;
337
25.1k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
25.1k
            errmsg = "'(' required to start ATTLIST enumeration";
339
25.1k
            break;
340
2.33k
        case XML_ERR_NMTOKEN_REQUIRED:
341
2.33k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
2.33k
            break;
343
6.23k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
6.23k
            errmsg = "')' required to finish ATTLIST enumeration";
345
6.23k
            break;
346
8.83k
        case XML_ERR_MIXED_NOT_STARTED:
347
8.83k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
8.83k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
13.8k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
13.8k
            errmsg = "ContentDecl : Name or '(' expected";
354
13.8k
            break;
355
40.2k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
40.2k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
40.2k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
650k
        case XML_ERR_GT_REQUIRED:
363
650k
            errmsg = "expected '>'";
364
650k
            break;
365
551
        case XML_ERR_CONDSEC_INVALID:
366
551
            errmsg = "XML conditional section '[' expected";
367
551
            break;
368
23.9k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
23.9k
            errmsg = "Content error in the external subset";
370
23.9k
            break;
371
2.28k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
2.28k
            errmsg =
373
2.28k
                "conditional section INCLUDE or IGNORE keyword expected";
374
2.28k
            break;
375
2.91k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.91k
            errmsg = "XML conditional section not closed";
377
2.91k
            break;
378
556
        case XML_ERR_XMLDECL_NOT_STARTED:
379
556
            errmsg = "Text declaration '<?xml' required";
380
556
            break;
381
200k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
200k
            errmsg = "parsing XML declaration: '?>' expected";
383
200k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
439k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
439k
            errmsg = "EntityRef: expecting ';'";
389
439k
            break;
390
61.8k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
61.8k
            errmsg = "DOCTYPE improperly terminated";
392
61.8k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
14.8k
        case XML_ERR_EQUAL_REQUIRED:
397
14.8k
            errmsg = "expected '='";
398
14.8k
            break;
399
40.1k
        case XML_ERR_STRING_NOT_CLOSED:
400
40.1k
            errmsg = "String not closed expecting \" or '";
401
40.1k
            break;
402
9.20k
        case XML_ERR_STRING_NOT_STARTED:
403
9.20k
            errmsg = "String not started expecting ' or \"";
404
9.20k
            break;
405
1.20k
        case XML_ERR_ENCODING_NAME:
406
1.20k
            errmsg = "Invalid XML encoding name";
407
1.20k
            break;
408
1.93k
        case XML_ERR_STANDALONE_VALUE:
409
1.93k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.93k
            break;
411
33.9k
        case XML_ERR_DOCUMENT_EMPTY:
412
33.9k
            errmsg = "Document is empty";
413
33.9k
            break;
414
295k
        case XML_ERR_DOCUMENT_END:
415
295k
            errmsg = "Extra content at the end of the document";
416
295k
            break;
417
7.24k
        case XML_ERR_NOT_WELL_BALANCED:
418
7.24k
            errmsg = "chunk is not well balanced";
419
7.24k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
124k
        case XML_ERR_VERSION_MISSING:
424
124k
            errmsg = "Malformed declaration expecting version";
425
124k
            break;
426
67
        case XML_ERR_NAME_TOO_LONG:
427
67
            errmsg = "Name too long";
428
67
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
13.1k
        default:
435
13.1k
            errmsg = "Unregistered error message";
436
6.82M
    }
437
6.82M
    if (ctxt != NULL)
438
6.82M
  ctxt->errNo = error;
439
6.82M
    if (info == NULL) {
440
3.92M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
3.92M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
3.92M
                        errmsg);
443
3.92M
    } else {
444
2.90M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
2.90M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
2.90M
                        errmsg, info);
447
2.90M
    }
448
6.82M
    if (ctxt != NULL) {
449
6.82M
  ctxt->wellFormed = 0;
450
6.82M
  if (ctxt->recovery == 0)
451
917k
      ctxt->disableSAX = 1;
452
6.82M
    }
453
6.82M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
10.8M
{
467
10.8M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
10.8M
        (ctxt->instate == XML_PARSER_EOF))
469
50
  return;
470
10.8M
    if (ctxt != NULL)
471
10.8M
  ctxt->errNo = error;
472
10.8M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
10.8M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
10.8M
    if (ctxt != NULL) {
475
10.8M
  ctxt->wellFormed = 0;
476
10.8M
  if (ctxt->recovery == 0)
477
1.24M
      ctxt->disableSAX = 1;
478
10.8M
    }
479
10.8M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
1.64M
{
495
1.64M
    xmlStructuredErrorFunc schannel = NULL;
496
497
1.64M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
1.64M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
1.64M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
1.64M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
1.50M
        schannel = ctxt->sax->serror;
503
1.64M
    if (ctxt != NULL) {
504
1.64M
        __xmlRaiseError(schannel,
505
1.64M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
1.64M
                    ctxt->userData,
507
1.64M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
1.64M
                    XML_ERR_WARNING, NULL, 0,
509
1.64M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
1.64M
        msg, (const char *) str1, (const char *) str2);
511
1.64M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
1.64M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
313k
{
533
313k
    xmlStructuredErrorFunc schannel = NULL;
534
535
313k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
313k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
313k
    if (ctxt != NULL) {
539
313k
  ctxt->errNo = error;
540
313k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
303k
      schannel = ctxt->sax->serror;
542
313k
    }
543
313k
    if (ctxt != NULL) {
544
313k
        __xmlRaiseError(schannel,
545
313k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
313k
                    ctxt, NULL, XML_FROM_DTD, error,
547
313k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
313k
        (const char *) str2, NULL, 0, 0,
549
313k
        msg, (const char *) str1, (const char *) str2);
550
313k
  ctxt->valid = 0;
551
313k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
313k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
16.5M
{
573
16.5M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
16.5M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
16.5M
    if (ctxt != NULL)
577
16.5M
  ctxt->errNo = error;
578
16.5M
    __xmlRaiseError(NULL, NULL, NULL,
579
16.5M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
16.5M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
16.5M
    if (ctxt != NULL) {
582
16.5M
  ctxt->wellFormed = 0;
583
16.5M
  if (ctxt->recovery == 0)
584
639k
      ctxt->disableSAX = 1;
585
16.5M
    }
586
16.5M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
2.86M
{
604
2.86M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
2.86M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
2.86M
    if (ctxt != NULL)
608
2.86M
  ctxt->errNo = error;
609
2.86M
    __xmlRaiseError(NULL, NULL, NULL,
610
2.86M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
2.86M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
2.86M
        NULL, val, 0, msg, str1, val, str2);
613
2.86M
    if (ctxt != NULL) {
614
2.86M
  ctxt->wellFormed = 0;
615
2.86M
  if (ctxt->recovery == 0)
616
576k
      ctxt->disableSAX = 1;
617
2.86M
    }
618
2.86M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
8.91M
{
633
8.91M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
8.91M
        (ctxt->instate == XML_PARSER_EOF))
635
7
  return;
636
8.91M
    if (ctxt != NULL)
637
8.91M
  ctxt->errNo = error;
638
8.91M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
8.91M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
8.91M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
8.91M
                    val);
642
8.91M
    if (ctxt != NULL) {
643
8.91M
  ctxt->wellFormed = 0;
644
8.91M
  if (ctxt->recovery == 0)
645
2.37M
      ctxt->disableSAX = 1;
646
8.91M
    }
647
8.91M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
398k
{
662
398k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
398k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
398k
    if (ctxt != NULL)
666
398k
  ctxt->errNo = error;
667
398k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
398k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
398k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
398k
                    val);
671
398k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
2.34M
{
689
2.34M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
2.34M
        (ctxt->instate == XML_PARSER_EOF))
691
102
  return;
692
2.34M
    if (ctxt != NULL)
693
2.34M
  ctxt->errNo = error;
694
2.34M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
2.34M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
2.34M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
2.34M
                    info1, info2, info3);
698
2.34M
    if (ctxt != NULL)
699
2.34M
  ctxt->nsWellFormed = 0;
700
2.34M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
120k
{
718
120k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
120k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
120k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
120k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
120k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
120k
                    info1, info2, info3);
725
120k
}
726
727
static void
728
179M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
179M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
179M
    else
732
179M
        *dst += val;
733
179M
}
734
735
static void
736
48.6M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
48.6M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
48.6M
    else
740
48.6M
        *dst += val;
741
48.6M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
48.2M
{
770
48.2M
    unsigned long consumed;
771
48.2M
    xmlParserInputPtr input = ctxt->input;
772
48.2M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
48.2M
    consumed = input->parentConsumed;
779
48.2M
    if ((entity == NULL) ||
780
48.2M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
34.3M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
34.3M
        xmlSaturatedAdd(&consumed, input->consumed);
783
34.3M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
34.3M
    }
785
48.2M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
48.2M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
48.2M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
48.2M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
48.2M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
748
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
748
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
748
                       "Maximum entity amplification factor exceeded");
803
748
        xmlHaltParser(ctxt);
804
748
        return(1);
805
748
    }
806
807
48.2M
    return(0);
808
48.2M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
2.06M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
2.06M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
2.06M
    (void) sax;
1048
1049
2.06M
    if (ctxt == NULL) return;
1050
2.06M
    sax = ctxt->sax;
1051
2.06M
#ifdef LIBXML_SAX1_ENABLED
1052
2.06M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
2.06M
        ((sax->startElementNs != NULL) ||
1054
1.32M
         (sax->endElementNs != NULL) ||
1055
1.32M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.32M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
2.06M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
2.06M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
2.06M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
2.06M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
2.06M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
2.06M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
205k
{
1103
205k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
241k
    while (*src == 0x20) src++;
1107
9.77M
    while (*src != 0) {
1108
9.57M
  if (*src == 0x20) {
1109
773k
      while (*src == 0x20) src++;
1110
260k
      if (*src != 0)
1111
216k
    *dst++ = 0x20;
1112
9.31M
  } else {
1113
9.31M
      *dst++ = *src++;
1114
9.31M
  }
1115
9.57M
    }
1116
205k
    *dst = 0;
1117
205k
    if (dst == src)
1118
143k
       return(NULL);
1119
61.9k
    return(dst);
1120
205k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
98.9k
{
1136
98.9k
    int i;
1137
98.9k
    int remove_head = 0;
1138
98.9k
    int need_realloc = 0;
1139
98.9k
    const xmlChar *cur;
1140
1141
98.9k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
98.9k
    i = *len;
1144
98.9k
    if (i <= 0)
1145
4.83k
        return(NULL);
1146
1147
94.1k
    cur = src;
1148
104k
    while (*cur == 0x20) {
1149
10.2k
        cur++;
1150
10.2k
  remove_head++;
1151
10.2k
    }
1152
2.27M
    while (*cur != 0) {
1153
2.19M
  if (*cur == 0x20) {
1154
123k
      cur++;
1155
123k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
10.4k
          need_realloc = 1;
1157
10.4k
    break;
1158
10.4k
      }
1159
123k
  } else
1160
2.06M
      cur++;
1161
2.19M
    }
1162
94.1k
    if (need_realloc) {
1163
10.4k
        xmlChar *ret;
1164
1165
10.4k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
10.4k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
10.4k
  xmlAttrNormalizeSpace(ret, ret);
1171
10.4k
  *len = strlen((const char *)ret);
1172
10.4k
        return(ret);
1173
83.6k
    } else if (remove_head) {
1174
3.86k
        *len -= remove_head;
1175
3.86k
        memmove(src, src + remove_head, 1 + *len);
1176
3.86k
  return(src);
1177
3.86k
    }
1178
79.8k
    return(NULL);
1179
94.1k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
166k
               const xmlChar *value) {
1195
166k
    xmlDefAttrsPtr defaults;
1196
166k
    int len;
1197
166k
    const xmlChar *name;
1198
166k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
166k
    if (ctxt->attsSpecial != NULL) {
1204
125k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
44.4k
      return;
1206
125k
    }
1207
1208
121k
    if (ctxt->attsDefault == NULL) {
1209
45.6k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
45.6k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
45.6k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
121k
    name = xmlSplitQName3(fullname, &len);
1219
121k
    if (name == NULL) {
1220
107k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
107k
  prefix = NULL;
1222
107k
    } else {
1223
13.7k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
13.7k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
13.7k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
121k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
121k
    if (defaults == NULL) {
1232
75.0k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
75.0k
                     (4 * 5) * sizeof(const xmlChar *));
1234
75.0k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
75.0k
  defaults->nbAttrs = 0;
1237
75.0k
  defaults->maxAttrs = 4;
1238
75.0k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
75.0k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
75.0k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
3.11k
        xmlDefAttrsPtr temp;
1245
1246
3.11k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
3.11k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
3.11k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
3.11k
  defaults = temp;
1251
3.11k
  defaults->maxAttrs *= 2;
1252
3.11k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
3.11k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
3.11k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
121k
    name = xmlSplitQName3(fullattr, &len);
1264
121k
    if (name == NULL) {
1265
99.9k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
99.9k
  prefix = NULL;
1267
99.9k
    } else {
1268
21.7k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
21.7k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
21.7k
    }
1271
1272
121k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
121k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
121k
    len = xmlStrlen(value);
1276
121k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
121k
    if (value == NULL)
1278
0
        goto mem_error;
1279
121k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
121k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
121k
    if (ctxt->external)
1282
22.6k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
99.0k
    else
1284
99.0k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
121k
    defaults->nbAttrs++;
1286
1287
121k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
121k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
964k
{
1309
964k
    if (ctxt->attsSpecial == NULL) {
1310
88.9k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
88.9k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
88.9k
    }
1314
1315
964k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
94.3k
        return;
1317
1318
869k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
869k
                     (void *) (ptrdiff_t) type);
1320
869k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
964k
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
567k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
567k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
567k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
237k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
237k
    }
1341
567k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
325k
{
1354
325k
    if (ctxt->attsSpecial == NULL)
1355
268k
        return;
1356
1357
56.5k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
56.5k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
13.6k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
13.6k
        ctxt->attsSpecial = NULL;
1362
13.6k
    }
1363
56.5k
    return;
1364
325k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
163k
{
1427
163k
    const xmlChar *cur = lang, *nxt;
1428
1429
163k
    if (cur == NULL)
1430
3.47k
        return (0);
1431
159k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
159k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
159k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
159k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
8.88k
        cur += 2;
1441
77.0k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
77.0k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
68.1k
            cur++;
1444
8.88k
        return(cur[0] == 0);
1445
8.88k
    }
1446
150k
    nxt = cur;
1447
646k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
646k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
495k
           nxt++;
1450
150k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
14.5k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
12.0k
            return(0);
1456
2.55k
        return(1);
1457
14.5k
    }
1458
136k
    if (nxt - cur < 2)
1459
9.86k
        return(0);
1460
    /* we got an ISO 639 code */
1461
126k
    if (nxt[0] == 0)
1462
6.74k
        return(1);
1463
119k
    if (nxt[0] != '-')
1464
7.24k
        return(0);
1465
1466
112k
    nxt++;
1467
112k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
112k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
11.8k
        goto region_m49;
1471
1472
474k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
474k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
373k
           nxt++;
1475
100k
    if (nxt - cur == 4)
1476
31.3k
        goto script;
1477
69.2k
    if (nxt - cur == 2)
1478
14.1k
        goto region;
1479
55.1k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
10.4k
        goto variant;
1481
44.6k
    if (nxt - cur != 3)
1482
14.6k
        return(0);
1483
    /* we parsed an extlang */
1484
29.9k
    if (nxt[0] == 0)
1485
2.73k
        return(1);
1486
27.2k
    if (nxt[0] != '-')
1487
2.70k
        return(0);
1488
1489
24.5k
    nxt++;
1490
24.5k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
24.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
1.51k
        goto region_m49;
1494
1495
118k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
118k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
95.8k
           nxt++;
1498
23.0k
    if (nxt - cur == 2)
1499
4.90k
        goto region;
1500
18.1k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
3.31k
        goto variant;
1502
14.8k
    if (nxt - cur != 4)
1503
11.8k
        return(0);
1504
    /* we parsed a script */
1505
34.2k
script:
1506
34.2k
    if (nxt[0] == 0)
1507
2.26k
        return(1);
1508
32.0k
    if (nxt[0] != '-')
1509
3.60k
        return(0);
1510
1511
28.4k
    nxt++;
1512
28.4k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
28.4k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
4.42k
        goto region_m49;
1516
1517
126k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
126k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
102k
           nxt++;
1520
1521
24.0k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
5.60k
        goto variant;
1523
18.3k
    if (nxt - cur != 2)
1524
12.0k
        return(0);
1525
    /* we parsed a region */
1526
28.4k
region:
1527
28.4k
    if (nxt[0] == 0)
1528
3.71k
        return(1);
1529
24.7k
    if (nxt[0] != '-')
1530
10.8k
        return(0);
1531
1532
13.8k
    nxt++;
1533
13.8k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
93.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
93.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
79.5k
           nxt++;
1538
1539
13.8k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
9.13k
        return(0);
1541
1542
    /* we parsed a variant */
1543
24.1k
variant:
1544
24.1k
    if (nxt[0] == 0)
1545
4.36k
        return(1);
1546
19.7k
    if (nxt[0] != '-')
1547
10.1k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
9.55k
    return (1);
1550
1551
17.7k
region_m49:
1552
17.7k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
17.7k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
3.09k
        nxt += 3;
1555
3.09k
        goto region;
1556
3.09k
    }
1557
14.6k
    return(0);
1558
17.7k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
767k
{
1584
767k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
340k
        int i;
1586
842k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
706k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
204k
          if (ctxt->nsTab[i + 1] == URL)
1590
86.8k
        return(-2);
1591
    /* out of scope keep it */
1592
117k
    break;
1593
204k
      }
1594
706k
  }
1595
340k
    }
1596
680k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
110k
  ctxt->nsMax = 10;
1598
110k
  ctxt->nsNr = 0;
1599
110k
  ctxt->nsTab = (const xmlChar **)
1600
110k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
110k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
569k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
21.0k
        const xmlChar ** tmp;
1608
21.0k
        ctxt->nsMax *= 2;
1609
21.0k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
21.0k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
21.0k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
21.0k
  ctxt->nsTab = tmp;
1617
21.0k
    }
1618
680k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
680k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
680k
    return (ctxt->nsNr);
1621
680k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
270k
{
1634
270k
    int i;
1635
1636
270k
    if (ctxt->nsTab == NULL) return(0);
1637
270k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
270k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
901k
    for (i = 0;i < nr;i++) {
1645
630k
         ctxt->nsNr--;
1646
630k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
630k
    }
1648
270k
    return(nr);
1649
270k
}
1650
#endif
1651
1652
static int
1653
176k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
176k
    const xmlChar **atts;
1655
176k
    int *attallocs;
1656
176k
    int maxatts;
1657
1658
176k
    if (nr + 5 > ctxt->maxatts) {
1659
176k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
176k
  atts = (const xmlChar **) xmlMalloc(
1661
176k
             maxatts * sizeof(const xmlChar *));
1662
176k
  if (atts == NULL) goto mem_error;
1663
176k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
176k
                               (maxatts / 5) * sizeof(int));
1665
176k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
176k
        if (ctxt->maxatts > 0)
1670
1.11k
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
176k
        xmlFree(ctxt->atts);
1672
176k
  ctxt->atts = atts;
1673
176k
  ctxt->attallocs = attallocs;
1674
176k
  ctxt->maxatts = maxatts;
1675
176k
    }
1676
176k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
176k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
15.0M
{
1694
15.0M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
15.0M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
403
        size_t newSize = ctxt->inputMax * 2;
1698
403
        xmlParserInputPtr *tmp;
1699
1700
403
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
403
                                               newSize * sizeof(*tmp));
1702
403
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
403
        ctxt->inputTab = tmp;
1707
403
        ctxt->inputMax = newSize;
1708
403
    }
1709
15.0M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
15.0M
    ctxt->input = value;
1711
15.0M
    return (ctxt->inputNr++);
1712
15.0M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
17.7M
{
1724
17.7M
    xmlParserInputPtr ret;
1725
1726
17.7M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
17.7M
    if (ctxt->inputNr <= 0)
1729
2.69M
        return (NULL);
1730
15.0M
    ctxt->inputNr--;
1731
15.0M
    if (ctxt->inputNr > 0)
1732
13.8M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
1.19M
    else
1734
1.19M
        ctxt->input = NULL;
1735
15.0M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
15.0M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
15.0M
    return (ret);
1738
17.7M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
13.2M
{
1751
13.2M
    if (ctxt == NULL) return(0);
1752
13.2M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
95.3k
        xmlNodePtr *tmp;
1754
1755
95.3k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
95.3k
                                      ctxt->nodeMax * 2 *
1757
95.3k
                                      sizeof(ctxt->nodeTab[0]));
1758
95.3k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
95.3k
        ctxt->nodeTab = tmp;
1763
95.3k
  ctxt->nodeMax *= 2;
1764
95.3k
    }
1765
13.2M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
13.2M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
326
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
326
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
326
        xmlParserMaxDepth);
1770
326
  xmlHaltParser(ctxt);
1771
326
  return(-1);
1772
326
    }
1773
13.2M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
13.2M
    ctxt->node = value;
1775
13.2M
    return (ctxt->nodeNr++);
1776
13.2M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
10.9M
{
1789
10.9M
    xmlNodePtr ret;
1790
1791
10.9M
    if (ctxt == NULL) return(NULL);
1792
10.9M
    if (ctxt->nodeNr <= 0)
1793
540k
        return (NULL);
1794
10.3M
    ctxt->nodeNr--;
1795
10.3M
    if (ctxt->nodeNr > 0)
1796
8.66M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
1.72M
    else
1798
1.72M
        ctxt->node = NULL;
1799
10.3M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
10.3M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
10.3M
    return (ret);
1802
10.9M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
12.6M
{
1821
12.6M
    xmlStartTag *tag;
1822
1823
12.6M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
194k
        const xmlChar * *tmp;
1825
194k
        xmlStartTag *tmp2;
1826
194k
        ctxt->nameMax *= 2;
1827
194k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
194k
                                    ctxt->nameMax *
1829
194k
                                    sizeof(ctxt->nameTab[0]));
1830
194k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
194k
  ctxt->nameTab = tmp;
1835
194k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
194k
                                    ctxt->nameMax *
1837
194k
                                    sizeof(ctxt->pushTab[0]));
1838
194k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
194k
  ctxt->pushTab = tmp2;
1843
12.4M
    } else if (ctxt->pushTab == NULL) {
1844
692k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
692k
                                            sizeof(ctxt->pushTab[0]));
1846
692k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
692k
    }
1849
12.6M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
12.6M
    ctxt->name = value;
1851
12.6M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
12.6M
    tag->prefix = prefix;
1853
12.6M
    tag->URI = URI;
1854
12.6M
    tag->line = line;
1855
12.6M
    tag->nsNr = nsNr;
1856
12.6M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
12.6M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.42M
{
1873
1.42M
    const xmlChar *ret;
1874
1875
1.42M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.42M
    ctxt->nameNr--;
1878
1.42M
    if (ctxt->nameNr > 0)
1879
1.39M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
26.9k
    else
1881
26.9k
        ctxt->name = NULL;
1882
1.42M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.42M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.42M
    return (ret);
1885
1.42M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
4.84M
{
1931
4.84M
    const xmlChar *ret;
1932
1933
4.84M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
4.84M
    ctxt->nameNr--;
1936
4.84M
    if (ctxt->nameNr > 0)
1937
4.64M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
199k
    else
1939
199k
        ctxt->name = NULL;
1940
4.84M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
4.84M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
4.84M
    return (ret);
1943
4.84M
}
1944
1945
15.7M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
15.7M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
112k
        int *tmp;
1948
1949
112k
  ctxt->spaceMax *= 2;
1950
112k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
112k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
112k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
112k
  ctxt->spaceTab = tmp;
1958
112k
    }
1959
15.7M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
15.7M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
15.7M
    return(ctxt->spaceNr++);
1962
15.7M
}
1963
1964
12.7M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
12.7M
    int ret;
1966
12.7M
    if (ctxt->spaceNr <= 0) return(0);
1967
12.6M
    ctxt->spaceNr--;
1968
12.6M
    if (ctxt->spaceNr > 0)
1969
12.1M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
467k
    else
1971
467k
        ctxt->space = &ctxt->spaceTab[0];
1972
12.6M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
12.6M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
12.6M
    return(ret);
1975
12.7M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
328M
#define RAW (*ctxt->input->cur)
2013
211M
#define CUR (*ctxt->input->cur)
2014
246M
#define NXT(val) ctxt->input->cur[(val)]
2015
20.8M
#define CUR_PTR ctxt->input->cur
2016
4.96M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
74.0M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
37.3M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
69.4M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
61.6M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
54.1M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
47.7M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
22.7M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
22.7M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
295k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
295k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
75.5M
#define SKIP(val) do {             \
2037
75.5M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
75.5M
    if (*ctxt->input->cur == 0)           \
2039
75.5M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
75.5M
  } while (0)
2041
2042
183k
#define SKIPL(val) do {             \
2043
183k
    int skipl;                \
2044
18.8M
    for(skipl=0; skipl<val; skipl++) {         \
2045
18.7M
  if (*(ctxt->input->cur) == '\n') {       \
2046
295k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
18.4M
  } else ctxt->input->col++;         \
2048
18.7M
  ctxt->input->cur++;           \
2049
18.7M
    }                  \
2050
183k
    if (*ctxt->input->cur == 0)           \
2051
183k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
183k
  } while (0)
2053
2054
154M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
154M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
154M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
154M
  xmlSHRINK (ctxt);
2058
2059
3.22M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
3.22M
    if ((ctxt->input->buf) &&
2062
3.22M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
8.66k
        xmlParserInputShrink(ctxt->input);
2064
3.22M
    if (*ctxt->input->cur == 0)
2065
115k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
3.22M
}
2067
2068
442M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
442M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
442M
  xmlGROW (ctxt);
2071
2072
43.3M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
43.3M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
43.3M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
43.3M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
43.3M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
43.3M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
43.3M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
43.3M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
43.3M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
43.3M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
43.3M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
699k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
43.3M
}
2095
2096
93.1M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
148M
#define NEXT xmlNextChar(ctxt)
2099
2100
21.3M
#define NEXT1 {               \
2101
21.3M
  ctxt->input->col++;           \
2102
21.3M
  ctxt->input->cur++;           \
2103
21.3M
  if (*ctxt->input->cur == 0)         \
2104
21.3M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
21.3M
    }
2106
2107
400M
#define NEXTL(l) do {             \
2108
400M
    if (*(ctxt->input->cur) == '\n') {         \
2109
6.10M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
394M
    } else ctxt->input->col++;           \
2111
400M
    ctxt->input->cur += l;        \
2112
400M
  } while (0)
2113
2114
436M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
3.83G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
4.11G
    if (l == 1) b[i++] = v;           \
2119
4.11G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
93.1M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
93.1M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
93.1M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
93.1M
        (ctxt->instate == XML_PARSER_START)) {
2141
52.8M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
52.8M
  cur = ctxt->input->cur;
2146
52.8M
  while (IS_BLANK_CH(*cur)) {
2147
21.8M
      if (*cur == '\n') {
2148
1.98M
    ctxt->input->line++; ctxt->input->col = 1;
2149
19.8M
      } else {
2150
19.8M
    ctxt->input->col++;
2151
19.8M
      }
2152
21.8M
      cur++;
2153
21.8M
      if (res < INT_MAX)
2154
21.8M
    res++;
2155
21.8M
      if (*cur == 0) {
2156
73.9k
    ctxt->input->cur = cur;
2157
73.9k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
73.9k
    cur = ctxt->input->cur;
2159
73.9k
      }
2160
21.8M
  }
2161
52.8M
  ctxt->input->cur = cur;
2162
52.8M
    } else {
2163
40.2M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
106M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
106M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
37.2M
    NEXT;
2168
69.6M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
15.8M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
338k
                    break;
2174
15.5M
          xmlParsePEReference(ctxt);
2175
53.7M
            } else if (CUR == 0) {
2176
13.8M
                unsigned long consumed;
2177
13.8M
                xmlEntityPtr ent;
2178
2179
13.8M
                if (ctxt->inputNr <= 1)
2180
64.3k
                    break;
2181
2182
13.8M
                consumed = ctxt->input->consumed;
2183
13.8M
                xmlSaturatedAddSizeT(&consumed,
2184
13.8M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
13.8M
                ent = ctxt->input->entity;
2191
13.8M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
13.8M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
2.33k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
2.33k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
2.33k
                }
2197
2198
13.8M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
13.8M
                xmlPopInput(ctxt);
2201
39.8M
            } else {
2202
39.8M
                break;
2203
39.8M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
66.6M
      if (res < INT_MAX)
2213
66.6M
    res++;
2214
66.6M
        }
2215
40.2M
    }
2216
93.1M
    return(res);
2217
93.1M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
13.8M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
13.8M
    xmlParserInputPtr input;
2237
2238
13.8M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
13.8M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
13.8M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
13.8M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
13.8M
    input = inputPop(ctxt);
2247
13.8M
    if (input->entity != NULL)
2248
13.8M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
13.8M
    xmlFreeInputStream(input);
2250
13.8M
    if (*ctxt->input->cur == 0)
2251
6.71M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
13.8M
    return(CUR);
2253
13.8M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
13.9M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
13.9M
    int ret;
2267
13.9M
    if (input == NULL) return(-1);
2268
2269
13.8M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
13.8M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
13.8M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
13.8M
    ret = inputPush(ctxt, input);
2285
13.8M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
13.8M
    GROW;
2288
13.8M
    return(ret);
2289
13.8M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.29M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.29M
    int val = 0;
2311
1.29M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.29M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.29M
        (NXT(2) == 'x')) {
2318
454k
  SKIP(3);
2319
454k
  GROW;
2320
1.69M
  while (RAW != ';') { /* loop blocked by count */
2321
1.38M
      if (count++ > 20) {
2322
27.1k
    count = 0;
2323
27.1k
    GROW;
2324
27.1k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
27.1k
      }
2327
1.38M
      if ((RAW >= '0') && (RAW <= '9'))
2328
709k
          val = val * 16 + (CUR - '0');
2329
674k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
267k
          val = val * 16 + (CUR - 'a') + 10;
2331
406k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
267k
          val = val * 16 + (CUR - 'A') + 10;
2333
139k
      else {
2334
139k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
139k
    val = 0;
2336
139k
    break;
2337
139k
      }
2338
1.24M
      if (val > 0x110000)
2339
384k
          val = 0x110000;
2340
2341
1.24M
      NEXT;
2342
1.24M
      count++;
2343
1.24M
  }
2344
454k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
314k
      ctxt->input->col++;
2347
314k
      ctxt->input->cur++;
2348
314k
  }
2349
842k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
842k
  SKIP(2);
2351
842k
  GROW;
2352
2.91M
  while (RAW != ';') { /* loop blocked by count */
2353
2.24M
      if (count++ > 20) {
2354
39.0k
    count = 0;
2355
39.0k
    GROW;
2356
39.0k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
39.0k
      }
2359
2.24M
      if ((RAW >= '0') && (RAW <= '9'))
2360
2.07M
          val = val * 10 + (CUR - '0');
2361
169k
      else {
2362
169k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
169k
    val = 0;
2364
169k
    break;
2365
169k
      }
2366
2.07M
      if (val > 0x110000)
2367
366k
          val = 0x110000;
2368
2369
2.07M
      NEXT;
2370
2.07M
      count++;
2371
2.07M
  }
2372
842k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
673k
      ctxt->input->col++;
2375
673k
      ctxt->input->cur++;
2376
673k
  }
2377
842k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.29M
    if (val >= 0x110000) {
2389
3.70k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
3.70k
                "xmlParseCharRef: character reference out of bounds\n",
2391
3.70k
          val);
2392
1.29M
    } else if (IS_CHAR(val)) {
2393
908k
        return(val);
2394
908k
    } else {
2395
384k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
384k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
384k
                    val);
2398
384k
    }
2399
387k
    return(0);
2400
1.29M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
210k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
210k
    const xmlChar *ptr;
2423
210k
    xmlChar cur;
2424
210k
    int val = 0;
2425
2426
210k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
210k
    ptr = *str;
2428
210k
    cur = *ptr;
2429
210k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
40.5k
  ptr += 3;
2431
40.5k
  cur = *ptr;
2432
152k
  while (cur != ';') { /* Non input consuming loop */
2433
118k
      if ((cur >= '0') && (cur <= '9'))
2434
69.3k
          val = val * 16 + (cur - '0');
2435
49.4k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
13.3k
          val = val * 16 + (cur - 'a') + 10;
2437
36.0k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
29.3k
          val = val * 16 + (cur - 'A') + 10;
2439
6.70k
      else {
2440
6.70k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
6.70k
    val = 0;
2442
6.70k
    break;
2443
6.70k
      }
2444
112k
      if (val > 0x110000)
2445
37.2k
          val = 0x110000;
2446
2447
112k
      ptr++;
2448
112k
      cur = *ptr;
2449
112k
  }
2450
40.5k
  if (cur == ';')
2451
33.8k
      ptr++;
2452
169k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
169k
  ptr += 2;
2454
169k
  cur = *ptr;
2455
610k
  while (cur != ';') { /* Non input consuming loops */
2456
452k
      if ((cur >= '0') && (cur <= '9'))
2457
440k
          val = val * 10 + (cur - '0');
2458
11.7k
      else {
2459
11.7k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
11.7k
    val = 0;
2461
11.7k
    break;
2462
11.7k
      }
2463
440k
      if (val > 0x110000)
2464
33.4k
          val = 0x110000;
2465
2466
440k
      ptr++;
2467
440k
      cur = *ptr;
2468
440k
  }
2469
169k
  if (cur == ';')
2470
158k
      ptr++;
2471
169k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
210k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
210k
    if (val >= 0x110000) {
2483
752
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
752
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
752
                val);
2486
209k
    } else if (IS_CHAR(val)) {
2487
184k
        return(val);
2488
184k
    } else {
2489
25.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
25.0k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
25.0k
        val);
2492
25.0k
    }
2493
25.8k
    return(0);
2494
210k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
2.75M
#define growBuffer(buffer, n) {           \
2593
2.75M
    xmlChar *tmp;             \
2594
2.75M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
2.75M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
2.75M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
2.75M
    if (tmp == NULL) goto mem_error;         \
2598
2.75M
    buffer = tmp;             \
2599
2.75M
    buffer##_size = new_size;                                           \
2600
2.75M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
34.0M
                           int check) {
2617
34.0M
    xmlChar *buffer = NULL;
2618
34.0M
    size_t buffer_size = 0;
2619
34.0M
    size_t nbchars = 0;
2620
2621
34.0M
    xmlChar *current = NULL;
2622
34.0M
    xmlChar *rep = NULL;
2623
34.0M
    const xmlChar *last;
2624
34.0M
    xmlEntityPtr ent;
2625
34.0M
    int c,l;
2626
2627
34.0M
    if (str == NULL)
2628
13.6k
        return(NULL);
2629
34.0M
    last = str + len;
2630
2631
34.0M
    if (((ctxt->depth > 40) &&
2632
34.0M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
34.0M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
34.0M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
34.0M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
34.0M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
34.0M
    if (str < last)
2651
34.0M
  c = CUR_SCHAR(str, l);
2652
46.0k
    else
2653
46.0k
        c = 0;
2654
3.09G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
3.09G
           (c != end2) && (c != end3) &&
2656
3.09G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
3.06G
  if (c == 0) break;
2659
3.06G
        if ((c == '&') && (str[1] == '#')) {
2660
210k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
210k
      if (val == 0)
2662
25.8k
                goto int_error;
2663
184k
      COPY_BUF(0,buffer,nbchars,val);
2664
184k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
544
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
544
      }
2667
3.06G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
35.0M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
35.0M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
35.0M
      if ((ent != NULL) &&
2674
35.0M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
63.3k
    if (ent->content != NULL) {
2676
63.3k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
63.3k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
1.82k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
1.82k
        }
2680
63.3k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
35.0M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
31.4M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
320
                    goto int_error;
2688
2689
31.4M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
596
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
596
                    xmlHaltParser(ctxt);
2692
596
                    ent->content[0] = 0;
2693
596
                    goto int_error;
2694
596
                }
2695
2696
31.4M
                ent->flags |= XML_ENT_EXPANDING;
2697
31.4M
    ctxt->depth++;
2698
31.4M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
31.4M
                        ent->length, what, 0, 0, 0, check);
2700
31.4M
    ctxt->depth--;
2701
31.4M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
31.4M
    if (rep == NULL) {
2704
6.80k
                    ent->content[0] = 0;
2705
6.80k
                    goto int_error;
2706
6.80k
                }
2707
2708
31.4M
                current = rep;
2709
11.7G
                while (*current != 0) { /* non input consuming loop */
2710
11.7G
                    buffer[nbchars++] = *current++;
2711
11.7G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
4.21M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
4.21M
                    }
2714
11.7G
                }
2715
31.4M
                xmlFree(rep);
2716
31.4M
                rep = NULL;
2717
31.4M
      } else if (ent != NULL) {
2718
835k
    int i = xmlStrlen(ent->name);
2719
835k
    const xmlChar *cur = ent->name;
2720
2721
835k
    buffer[nbchars++] = '&';
2722
835k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
1.26k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
1.26k
    }
2725
5.74M
    for (;i > 0;i--)
2726
4.91M
        buffer[nbchars++] = *cur++;
2727
835k
    buffer[nbchars++] = ';';
2728
835k
      }
2729
3.02G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
151k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
151k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
151k
      if (ent != NULL) {
2735
116k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
1.36k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
1.36k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
1.36k
      (ctxt->validate != 0)) {
2745
1.29k
      xmlLoadEntityContent(ctxt, ent);
2746
1.29k
        } else {
2747
67
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
67
      "not validating will not read content for PE entity %s\n",
2749
67
                          ent->name, NULL);
2750
67
        }
2751
1.36k
    }
2752
2753
116k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
140
                    goto int_error;
2755
2756
116k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
704
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
704
                    xmlHaltParser(ctxt);
2759
704
                    if (ent->content != NULL)
2760
280
                        ent->content[0] = 0;
2761
704
                    goto int_error;
2762
704
                }
2763
2764
115k
                ent->flags |= XML_ENT_EXPANDING;
2765
115k
    ctxt->depth++;
2766
115k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
115k
                        ent->length, what, 0, 0, 0, check);
2768
115k
    ctxt->depth--;
2769
115k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
115k
    if (rep == NULL) {
2772
686
                    if (ent->content != NULL)
2773
302
                        ent->content[0] = 0;
2774
686
                    goto int_error;
2775
686
                }
2776
114k
                current = rep;
2777
1.30G
                while (*current != 0) { /* non input consuming loop */
2778
1.30G
                    buffer[nbchars++] = *current++;
2779
1.30G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
32.6k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
32.6k
                    }
2782
1.30G
                }
2783
114k
                xmlFree(rep);
2784
114k
                rep = NULL;
2785
114k
      }
2786
3.02G
  } else {
2787
3.02G
      COPY_BUF(l,buffer,nbchars,c);
2788
3.02G
      str += l;
2789
3.02G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
843k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
843k
      }
2792
3.02G
  }
2793
3.06G
  if (str < last)
2794
3.02G
      c = CUR_SCHAR(str, l);
2795
33.9M
  else
2796
33.9M
      c = 0;
2797
3.06G
    }
2798
34.0M
    buffer[nbchars] = 0;
2799
34.0M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
35.0k
int_error:
2804
35.0k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
35.0k
    if (buffer != NULL)
2807
35.0k
        xmlFree(buffer);
2808
35.0k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
8.44k
                           xmlChar end3) {
2836
8.44k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
8.44k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
8.44k
                                      end, end2, end3, 0));
2840
8.44k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
248k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
248k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
248k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
248k
                                      end, end2, end3, 0));
2868
248k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
9.41M
                     int blank_chars) {
2890
9.41M
    int i, ret;
2891
9.41M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
9.41M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
2.26M
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
7.15M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
7.15M
        (*(ctxt->space) == -2))
2905
3.38M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
3.76M
    if (blank_chars == 0) {
2911
5.40M
  for (i = 0;i < len;i++)
2912
4.91M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.56M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
2.69M
    if (ctxt->node == NULL) return(0);
2919
2.52M
    if (ctxt->myDoc != NULL) {
2920
2.52M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.52M
        if (ret == 0) return(1);
2922
2.42M
        if (ret == 1) return(0);
2923
2.42M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
2.40M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
2.31M
    if ((ctxt->node->children == NULL) &&
2930
2.31M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
2.28M
    lastChild = xmlGetLastChild(ctxt->node);
2933
2.28M
    if (lastChild == NULL) {
2934
1.00M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
1.00M
            (ctxt->node->content != NULL)) return(0);
2936
1.28M
    } else if (xmlNodeIsText(lastChild))
2937
146k
        return(0);
2938
1.13M
    else if ((ctxt->node->children != NULL) &&
2939
1.13M
             (xmlNodeIsText(ctxt->node->children)))
2940
35.2k
        return(0);
2941
2.10M
    return(1);
2942
2.28M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
8.74M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
8.74M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
8.74M
    xmlChar *buffer = NULL;
2973
8.74M
    int len = 0;
2974
8.74M
    int max = XML_MAX_NAMELEN;
2975
8.74M
    xmlChar *ret = NULL;
2976
8.74M
    const xmlChar *cur = name;
2977
8.74M
    int c;
2978
2979
8.74M
    if (prefix == NULL) return(NULL);
2980
8.74M
    *prefix = NULL;
2981
2982
8.74M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
8.74M
    if (cur[0] == ':')
2993
39.5k
  return(xmlStrdup(name));
2994
2995
8.70M
    c = *cur++;
2996
46.5M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
37.8M
  buf[len++] = c;
2998
37.8M
  c = *cur++;
2999
37.8M
    }
3000
8.70M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
23.4k
  max = len * 2;
3006
3007
23.4k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
23.4k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
23.4k
  memcpy(buffer, buf, len);
3013
3.42M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
3.40M
      if (len + 10 > max) {
3015
5.92k
          xmlChar *tmp;
3016
3017
5.92k
    max *= 2;
3018
5.92k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
5.92k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
5.92k
    buffer = tmp;
3025
5.92k
      }
3026
3.40M
      buffer[len++] = c;
3027
3.40M
      c = *cur++;
3028
3.40M
  }
3029
23.4k
  buffer[len] = 0;
3030
23.4k
    }
3031
3032
8.70M
    if ((c == ':') && (*cur == 0)) {
3033
65.1k
        if (buffer != NULL)
3034
855
      xmlFree(buffer);
3035
65.1k
  *prefix = NULL;
3036
65.1k
  return(xmlStrdup(name));
3037
65.1k
    }
3038
3039
8.64M
    if (buffer == NULL)
3040
8.62M
  ret = xmlStrndup(buf, len);
3041
22.5k
    else {
3042
22.5k
  ret = buffer;
3043
22.5k
  buffer = NULL;
3044
22.5k
  max = XML_MAX_NAMELEN;
3045
22.5k
    }
3046
3047
3048
8.64M
    if (c == ':') {
3049
2.33M
  c = *cur;
3050
2.33M
        *prefix = ret;
3051
2.33M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
2.33M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
2.33M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
2.33M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
2.33M
        (c == '_') || (c == ':'))) {
3063
82.6k
      int l;
3064
82.6k
      int first = CUR_SCHAR(cur, l);
3065
3066
82.6k
      if (!IS_LETTER(first) && (first != '_')) {
3067
28.6k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
28.6k
          "Name %s is not XML Namespace compliant\n",
3069
28.6k
          name);
3070
28.6k
      }
3071
82.6k
  }
3072
2.33M
  cur++;
3073
3074
16.3M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
13.9M
      buf[len++] = c;
3076
13.9M
      c = *cur++;
3077
13.9M
  }
3078
2.33M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
17.2k
      max = len * 2;
3084
3085
17.2k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
17.2k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
17.2k
      memcpy(buffer, buf, len);
3091
2.22M
      while (c != 0) { /* tested bigname2.xml */
3092
2.21M
    if (len + 10 > max) {
3093
5.19k
        xmlChar *tmp;
3094
3095
5.19k
        max *= 2;
3096
5.19k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
5.19k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
5.19k
        buffer = tmp;
3103
5.19k
    }
3104
2.21M
    buffer[len++] = c;
3105
2.21M
    c = *cur++;
3106
2.21M
      }
3107
17.2k
      buffer[len] = 0;
3108
17.2k
  }
3109
3110
2.33M
  if (buffer == NULL)
3111
2.32M
      ret = xmlStrndup(buf, len);
3112
17.2k
  else {
3113
17.2k
      ret = buffer;
3114
17.2k
  }
3115
2.33M
    }
3116
3117
8.64M
    return(ret);
3118
8.64M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
39.2M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
39.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
34.7M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
34.7M
      (((c >= 'a') && (c <= 'z')) ||
3160
34.7M
       ((c >= 'A') && (c <= 'Z')) ||
3161
34.7M
       (c == '_') || (c == ':') ||
3162
34.7M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
34.7M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
34.7M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
34.7M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
34.7M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
34.7M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
34.7M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
34.7M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
34.7M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
34.7M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
34.7M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
34.7M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
33.3M
      return(1);
3175
34.7M
    } else {
3176
4.40M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
3.85M
      return(1);
3178
4.40M
    }
3179
2.02M
    return(0);
3180
39.2M
}
3181
3182
static int
3183
768M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
768M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
743M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
743M
      (((c >= 'a') && (c <= 'z')) ||
3191
743M
       ((c >= 'A') && (c <= 'Z')) ||
3192
743M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
743M
       (c == '_') || (c == ':') ||
3194
743M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
743M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
743M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
743M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
743M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
743M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
743M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
743M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
743M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
743M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
743M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
743M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
743M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
743M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
743M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
710M
       return(1);
3210
743M
    } else {
3211
24.4M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
24.4M
            (c == '.') || (c == '-') ||
3213
24.4M
      (c == '_') || (c == ':') ||
3214
24.4M
      (IS_COMBINING(c)) ||
3215
24.4M
      (IS_EXTENDER(c)))
3216
20.5M
      return(1);
3217
24.4M
    }
3218
37.2M
    return(0);
3219
768M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
5.00M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
5.00M
    int len = 0, l;
3227
5.00M
    int c;
3228
5.00M
    int count = 0;
3229
5.00M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
2.05M
                    XML_MAX_TEXT_LENGTH :
3231
5.00M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
5.00M
    GROW;
3241
5.00M
    if (ctxt->instate == XML_PARSER_EOF)
3242
17
        return(NULL);
3243
5.00M
    c = CUR_CHAR(l);
3244
5.00M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
2.85M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
2.85M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
2.69M
         ((c >= 'A') && (c <= 'Z')) ||
3252
2.69M
         (c == '_') || (c == ':') ||
3253
2.69M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
2.69M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
2.69M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
2.69M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
2.69M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
2.69M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
2.69M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
2.69M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
2.69M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
2.69M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
2.69M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
2.69M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.87M
      return(NULL);
3266
1.87M
  }
3267
985k
  len += l;
3268
985k
  NEXTL(l);
3269
985k
  c = CUR_CHAR(l);
3270
20.1M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
20.1M
         (((c >= 'a') && (c <= 'z')) ||
3272
19.9M
          ((c >= 'A') && (c <= 'Z')) ||
3273
19.9M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
19.9M
          (c == '_') || (c == ':') ||
3275
19.9M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
19.9M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
19.9M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
19.9M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
19.9M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
19.9M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
19.9M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
19.9M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
19.9M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
19.9M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
19.9M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
19.9M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
19.9M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
19.9M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
19.9M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
19.9M
    )) {
3291
19.2M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
74.7k
    count = 0;
3293
74.7k
    GROW;
3294
74.7k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
74.7k
      }
3297
19.2M
            if (len <= INT_MAX - l)
3298
19.2M
          len += l;
3299
19.2M
      NEXTL(l);
3300
19.2M
      c = CUR_CHAR(l);
3301
19.2M
  }
3302
2.14M
    } else {
3303
2.14M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
2.14M
      (!IS_LETTER(c) && (c != '_') &&
3305
2.00M
       (c != ':'))) {
3306
1.45M
      return(NULL);
3307
1.45M
  }
3308
686k
  len += l;
3309
686k
  NEXTL(l);
3310
686k
  c = CUR_CHAR(l);
3311
3312
14.3M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
14.3M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
14.0M
    (c == '.') || (c == '-') ||
3315
14.0M
    (c == '_') || (c == ':') ||
3316
14.0M
    (IS_COMBINING(c)) ||
3317
14.0M
    (IS_EXTENDER(c)))) {
3318
13.6M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
55.0k
    count = 0;
3320
55.0k
    GROW;
3321
55.0k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
55.0k
      }
3324
13.6M
            if (len <= INT_MAX - l)
3325
13.6M
          len += l;
3326
13.6M
      NEXTL(l);
3327
13.6M
      c = CUR_CHAR(l);
3328
13.6M
  }
3329
686k
    }
3330
1.67M
    if (len > maxLength) {
3331
8
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
8
        return(NULL);
3333
8
    }
3334
1.67M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
1.67M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
11.7k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
1.66M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
1.67M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
41.5M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
41.5M
    const xmlChar *in;
3370
41.5M
    const xmlChar *ret;
3371
41.5M
    size_t count = 0;
3372
41.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
10.3M
                       XML_MAX_TEXT_LENGTH :
3374
41.5M
                       XML_MAX_NAME_LENGTH;
3375
3376
41.5M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
41.5M
    in = ctxt->input->cur;
3386
41.5M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
41.5M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
41.5M
  (*in == '_') || (*in == ':')) {
3389
37.6M
  in++;
3390
175M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
175M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
175M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
175M
         (*in == '_') || (*in == '-') ||
3394
175M
         (*in == ':') || (*in == '.'))
3395
137M
      in++;
3396
37.6M
  if ((*in > 0) && (*in < 0x80)) {
3397
36.5M
      count = in - ctxt->input->cur;
3398
36.5M
            if (count > maxLength) {
3399
7
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
7
                return(NULL);
3401
7
            }
3402
36.5M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
36.5M
      ctxt->input->cur = in;
3404
36.5M
      ctxt->input->col += count;
3405
36.5M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
36.5M
      return(ret);
3408
36.5M
  }
3409
37.6M
    }
3410
    /* accelerator for special cases */
3411
5.00M
    return(xmlParseNameComplex(ctxt));
3412
41.5M
}
3413
3414
static const xmlChar *
3415
3.70M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
3.70M
    int len = 0, l;
3417
3.70M
    int c;
3418
3.70M
    int count = 0;
3419
3.70M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
1.06M
                    XML_MAX_TEXT_LENGTH :
3421
3.70M
                    XML_MAX_NAME_LENGTH;
3422
3.70M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
3.70M
    GROW;
3432
3.70M
    startPosition = CUR_PTR - BASE_PTR;
3433
3.70M
    c = CUR_CHAR(l);
3434
3.70M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
3.70M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
2.43M
  return(NULL);
3437
2.43M
    }
3438
3439
20.4M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
20.4M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
19.1M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
58.1k
      count = 0;
3443
58.1k
      GROW;
3444
58.1k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
58.1k
  }
3447
19.1M
        if (len <= INT_MAX - l)
3448
19.1M
      len += l;
3449
19.1M
  NEXTL(l);
3450
19.1M
  c = CUR_CHAR(l);
3451
19.1M
  if (c == 0) {
3452
85.1k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
85.1k
      ctxt->input->cur -= l;
3459
85.1k
      GROW;
3460
85.1k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
85.1k
      ctxt->input->cur += l;
3463
85.1k
      c = CUR_CHAR(l);
3464
85.1k
  }
3465
19.1M
    }
3466
1.26M
    if (len > maxLength) {
3467
3
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
3
        return(NULL);
3469
3
    }
3470
1.26M
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
1.26M
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
23.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
23.0M
    const xmlChar *in, *e;
3491
23.0M
    const xmlChar *ret;
3492
23.0M
    size_t count = 0;
3493
23.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
5.59M
                       XML_MAX_TEXT_LENGTH :
3495
23.0M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
23.0M
    in = ctxt->input->cur;
3505
23.0M
    e = ctxt->input->end;
3506
23.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
23.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
23.0M
   (*in == '_')) && (in < e)) {
3509
20.0M
  in++;
3510
82.6M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
82.6M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
82.6M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
82.6M
          (*in == '_') || (*in == '-') ||
3514
82.6M
          (*in == '.')) && (in < e))
3515
62.6M
      in++;
3516
20.0M
  if (in >= e)
3517
5.15k
      goto complex;
3518
20.0M
  if ((*in > 0) && (*in < 0x80)) {
3519
19.3M
      count = in - ctxt->input->cur;
3520
19.3M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
19.3M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
19.3M
      ctxt->input->cur = in;
3526
19.3M
      ctxt->input->col += count;
3527
19.3M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
19.3M
      return(ret);
3531
19.3M
  }
3532
20.0M
    }
3533
3.70M
complex:
3534
3.70M
    return(xmlParseNCNameComplex(ctxt));
3535
23.0M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
2.73M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
2.73M
    register const xmlChar *cmp = other;
3551
2.73M
    register const xmlChar *in;
3552
2.73M
    const xmlChar *ret;
3553
3554
2.73M
    GROW;
3555
2.73M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
2.73M
    in = ctxt->input->cur;
3559
12.7M
    while (*in != 0 && *in == *cmp) {
3560
10.0M
  ++in;
3561
10.0M
  ++cmp;
3562
10.0M
    }
3563
2.73M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
1.90M
  ctxt->input->col += in - ctxt->input->cur;
3566
1.90M
  ctxt->input->cur = in;
3567
1.90M
  return (const xmlChar*) 1;
3568
1.90M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
830k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
830k
    if (ret == other) {
3573
47.1k
  return (const xmlChar*) 1;
3574
47.1k
    }
3575
783k
    return ret;
3576
830k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
35.8M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
35.8M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
35.8M
    const xmlChar *cur = *str;
3600
35.8M
    int len = 0, l;
3601
35.8M
    int c;
3602
35.8M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
3.24M
                    XML_MAX_TEXT_LENGTH :
3604
35.8M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
35.8M
    c = CUR_SCHAR(cur, l);
3611
35.8M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
22.5k
  return(NULL);
3613
22.5k
    }
3614
3615
35.7M
    COPY_BUF(l,buf,len,c);
3616
35.7M
    cur += l;
3617
35.7M
    c = CUR_SCHAR(cur, l);
3618
338M
    while (xmlIsNameChar(ctxt, c)) {
3619
304M
  COPY_BUF(l,buf,len,c);
3620
304M
  cur += l;
3621
304M
  c = CUR_SCHAR(cur, l);
3622
304M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
1.77M
      xmlChar *buffer;
3628
1.77M
      int max = len * 2;
3629
3630
1.77M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
1.77M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
1.77M
      memcpy(buffer, buf, len);
3636
402M
      while (xmlIsNameChar(ctxt, c)) {
3637
400M
    if (len + 10 > max) {
3638
1.77M
        xmlChar *tmp;
3639
3640
1.77M
        max *= 2;
3641
1.77M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
1.77M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
1.77M
        buffer = tmp;
3648
1.77M
    }
3649
400M
    COPY_BUF(l,buffer,len,c);
3650
400M
    cur += l;
3651
400M
    c = CUR_SCHAR(cur, l);
3652
400M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
400M
      }
3658
1.77M
      buffer[len] = 0;
3659
1.77M
      *str = cur;
3660
1.77M
      return(buffer);
3661
1.77M
  }
3662
304M
    }
3663
34.0M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
34.0M
    *str = cur;
3668
34.0M
    return(xmlStrndup(buf, len));
3669
34.0M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
657k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
657k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
657k
    int len = 0, l;
3690
657k
    int c;
3691
657k
    int count = 0;
3692
657k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
182k
                    XML_MAX_TEXT_LENGTH :
3694
657k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
657k
    GROW;
3701
657k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
657k
    c = CUR_CHAR(l);
3704
3705
4.24M
    while (xmlIsNameChar(ctxt, c)) {
3706
3.59M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
3.59M
  COPY_BUF(l,buf,len,c);
3711
3.59M
  NEXTL(l);
3712
3.59M
  c = CUR_CHAR(l);
3713
3.59M
  if (c == 0) {
3714
4.62k
      count = 0;
3715
4.62k
      GROW;
3716
4.62k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
4.62k
            c = CUR_CHAR(l);
3719
4.62k
  }
3720
3.59M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
9.83k
      xmlChar *buffer;
3726
9.83k
      int max = len * 2;
3727
3728
9.83k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
9.83k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
9.83k
      memcpy(buffer, buf, len);
3734
2.91M
      while (xmlIsNameChar(ctxt, c)) {
3735
2.90M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
33.9k
        count = 0;
3737
33.9k
        GROW;
3738
33.9k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
33.9k
    }
3743
2.90M
    if (len + 10 > max) {
3744
5.98k
        xmlChar *tmp;
3745
3746
5.98k
        max *= 2;
3747
5.98k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
5.98k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
5.98k
        buffer = tmp;
3754
5.98k
    }
3755
2.90M
    COPY_BUF(l,buffer,len,c);
3756
2.90M
    NEXTL(l);
3757
2.90M
    c = CUR_CHAR(l);
3758
2.90M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
2.90M
      }
3764
9.83k
      buffer[len] = 0;
3765
9.83k
      return(buffer);
3766
9.83k
  }
3767
3.59M
    }
3768
647k
    if (len == 0)
3769
148k
        return(NULL);
3770
498k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
498k
    return(xmlStrndup(buf, len));
3775
498k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
690k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
690k
    xmlChar *buf = NULL;
3795
690k
    int len = 0;
3796
690k
    int size = XML_PARSER_BUFFER_SIZE;
3797
690k
    int c, l;
3798
690k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
179k
                    XML_MAX_HUGE_LENGTH :
3800
690k
                    XML_MAX_TEXT_LENGTH;
3801
690k
    xmlChar stop;
3802
690k
    xmlChar *ret = NULL;
3803
690k
    const xmlChar *cur = NULL;
3804
690k
    xmlParserInputPtr input;
3805
3806
690k
    if (RAW == '"') stop = '"';
3807
125k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
690k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
690k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
690k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
690k
    input = ctxt->input;
3824
690k
    GROW;
3825
690k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
690k
    NEXT;
3828
690k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
41.1M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
41.1M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
40.4M
  if (len + 5 >= size) {
3841
98.6k
      xmlChar *tmp;
3842
3843
98.6k
      size *= 2;
3844
98.6k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
98.6k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
98.6k
      buf = tmp;
3850
98.6k
  }
3851
40.4M
  COPY_BUF(l,buf,len,c);
3852
40.4M
  NEXTL(l);
3853
3854
40.4M
  GROW;
3855
40.4M
  c = CUR_CHAR(l);
3856
40.4M
  if (c == 0) {
3857
3.69k
      GROW;
3858
3.69k
      c = CUR_CHAR(l);
3859
3.69k
  }
3860
3861
40.4M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
40.4M
    }
3867
690k
    buf[len] = 0;
3868
690k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
690k
    if (c != stop) {
3871
5.68k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
5.68k
        goto error;
3873
5.68k
    }
3874
685k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
685k
    cur = buf;
3882
26.7M
    while (*cur != 0) { /* non input consuming */
3883
26.0M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
581k
      xmlChar *name;
3885
581k
      xmlChar tmp = *cur;
3886
581k
            int nameOk = 0;
3887
3888
581k
      cur++;
3889
581k
      name = xmlParseStringName(ctxt, &cur);
3890
581k
            if (name != NULL) {
3891
566k
                nameOk = 1;
3892
566k
                xmlFree(name);
3893
566k
            }
3894
581k
            if ((nameOk == 0) || (*cur != ';')) {
3895
34.1k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
34.1k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
34.1k
                            tmp);
3898
34.1k
                goto error;
3899
34.1k
      }
3900
547k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
547k
    (ctxt->inputNr == 1)) {
3902
3.42k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
3.42k
                goto error;
3904
3.42k
      }
3905
544k
      if (*cur == 0)
3906
0
          break;
3907
544k
  }
3908
26.0M
  cur++;
3909
26.0M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
647k
    ++ctxt->depth;
3920
647k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
647k
                                     0, 0, 0, /* check */ 1);
3922
647k
    --ctxt->depth;
3923
3924
647k
    if (orig != NULL) {
3925
647k
        *orig = buf;
3926
647k
        buf = NULL;
3927
647k
    }
3928
3929
690k
error:
3930
690k
    if (buf != NULL)
3931
43.2k
        xmlFree(buf);
3932
690k
    return(ret);
3933
647k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.98M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.98M
    xmlChar limit = 0;
3950
1.98M
    xmlChar *buf = NULL;
3951
1.98M
    xmlChar *rep = NULL;
3952
1.98M
    size_t len = 0;
3953
1.98M
    size_t buf_size = 0;
3954
1.98M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
697k
                       XML_MAX_HUGE_LENGTH :
3956
1.98M
                       XML_MAX_TEXT_LENGTH;
3957
1.98M
    int c, l, in_space = 0;
3958
1.98M
    xmlChar *current = NULL;
3959
1.98M
    xmlEntityPtr ent;
3960
3961
1.98M
    if (NXT(0) == '"') {
3962
1.36M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.36M
  limit = '"';
3964
1.36M
        NEXT;
3965
1.36M
    } else if (NXT(0) == '\'') {
3966
626k
  limit = '\'';
3967
626k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
626k
        NEXT;
3969
626k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.98M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.98M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.98M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.98M
    c = CUR_CHAR(l);
3985
69.4M
    while (((NXT(0) != limit) && /* checked */
3986
69.4M
            (IS_CHAR(c)) && (c != '<')) &&
3987
69.4M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
67.4M
  if (c == '&') {
3989
3.18M
      in_space = 0;
3990
3.18M
      if (NXT(1) == '#') {
3991
401k
    int val = xmlParseCharRef(ctxt);
3992
3993
401k
    if (val == '&') {
3994
69.0k
        if (ctxt->replaceEntities) {
3995
25.7k
      if (len + 10 > buf_size) {
3996
452
          growBuffer(buf, 10);
3997
452
      }
3998
25.7k
      buf[len++] = '&';
3999
43.3k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
43.3k
      if (len + 10 > buf_size) {
4005
366
          growBuffer(buf, 10);
4006
366
      }
4007
43.3k
      buf[len++] = '&';
4008
43.3k
      buf[len++] = '#';
4009
43.3k
      buf[len++] = '3';
4010
43.3k
      buf[len++] = '8';
4011
43.3k
      buf[len++] = ';';
4012
43.3k
        }
4013
332k
    } else if (val != 0) {
4014
247k
        if (len + 10 > buf_size) {
4015
1.02k
      growBuffer(buf, 10);
4016
1.02k
        }
4017
247k
        len += xmlCopyChar(0, &buf[len], val);
4018
247k
    }
4019
2.78M
      } else {
4020
2.78M
    ent = xmlParseEntityRef(ctxt);
4021
2.78M
    if ((ent != NULL) &&
4022
2.78M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
258k
        if (len + 10 > buf_size) {
4024
566
      growBuffer(buf, 10);
4025
566
        }
4026
258k
        if ((ctxt->replaceEntities == 0) &&
4027
258k
            (ent->content[0] == '&')) {
4028
66.3k
      buf[len++] = '&';
4029
66.3k
      buf[len++] = '#';
4030
66.3k
      buf[len++] = '3';
4031
66.3k
      buf[len++] = '8';
4032
66.3k
      buf[len++] = ';';
4033
191k
        } else {
4034
191k
      buf[len++] = ent->content[0];
4035
191k
        }
4036
2.52M
    } else if ((ent != NULL) &&
4037
2.52M
               (ctxt->replaceEntities != 0)) {
4038
1.52M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
1.52M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
1.52M
      ++ctxt->depth;
4043
1.52M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
1.52M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
1.52M
                                /* check */ 1);
4046
1.52M
      --ctxt->depth;
4047
1.52M
      if (rep != NULL) {
4048
1.51M
          current = rep;
4049
308M
          while (*current != 0) { /* non input consuming */
4050
307M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
307M
                                    (*current == 0x9)) {
4052
172k
                                    buf[len++] = 0x20;
4053
172k
                                    current++;
4054
172k
                                } else
4055
306M
                                    buf[len++] = *current++;
4056
307M
        if (len + 10 > buf_size) {
4057
51.8k
            growBuffer(buf, 10);
4058
51.8k
        }
4059
307M
          }
4060
1.51M
          xmlFree(rep);
4061
1.51M
          rep = NULL;
4062
1.51M
      }
4063
1.52M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
1.52M
    } else if (ent != NULL) {
4071
550k
        int i = xmlStrlen(ent->name);
4072
550k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
550k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
550k
      (ent->content != NULL)) {
4081
501k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
15.8k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
15.8k
                            ctxt->sizeentcopy = ent->length;
4085
4086
15.8k
                            ++ctxt->depth;
4087
15.8k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
15.8k
                                    ent->content, ent->length,
4089
15.8k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
15.8k
                                    /* check */ 1);
4091
15.8k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
15.8k
                            if (ctxt->inSubset == 0) {
4100
10.5k
                                ent->flags |= XML_ENT_CHECKED;
4101
10.5k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
10.5k
                            }
4103
4104
15.8k
                            if (rep != NULL) {
4105
15.5k
                                xmlFree(rep);
4106
15.5k
                                rep = NULL;
4107
15.5k
                            } else {
4108
334
                                ent->content[0] = 0;
4109
334
                            }
4110
4111
15.8k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
21
                                goto error;
4113
485k
                        } else {
4114
485k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
5
                                goto error;
4116
485k
                        }
4117
501k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
550k
        buf[len++] = '&';
4123
556k
        while (len + i + 10 > buf_size) {
4124
10.0k
      growBuffer(buf, i + 10);
4125
10.0k
        }
4126
1.38M
        for (;i > 0;i--)
4127
838k
      buf[len++] = *cur++;
4128
550k
        buf[len++] = ';';
4129
550k
    }
4130
2.78M
      }
4131
64.2M
  } else {
4132
64.2M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
4.92M
          if ((len != 0) || (!normalize)) {
4134
4.83M
        if ((!normalize) || (!in_space)) {
4135
4.66M
      COPY_BUF(l,buf,len,0x20);
4136
4.68M
      while (len + 10 > buf_size) {
4137
35.5k
          growBuffer(buf, 10);
4138
35.5k
      }
4139
4.66M
        }
4140
4.83M
        in_space = 1;
4141
4.83M
    }
4142
59.3M
      } else {
4143
59.3M
          in_space = 0;
4144
59.3M
    COPY_BUF(l,buf,len,c);
4145
59.3M
    if (len + 10 > buf_size) {
4146
309k
        growBuffer(buf, 10);
4147
309k
    }
4148
59.3M
      }
4149
64.2M
      NEXTL(l);
4150
64.2M
  }
4151
67.4M
  GROW;
4152
67.4M
  c = CUR_CHAR(l);
4153
67.4M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
67.4M
    }
4159
1.98M
    if (ctxt->instate == XML_PARSER_EOF)
4160
895
        goto error;
4161
4162
1.98M
    if ((in_space) && (normalize)) {
4163
101k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
48.9k
    }
4165
1.98M
    buf[len] = 0;
4166
1.98M
    if (RAW == '<') {
4167
722k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.26M
    } else if (RAW != limit) {
4169
300k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
161k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
161k
         "invalid character in attribute value\n");
4172
161k
  } else {
4173
138k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
138k
         "AttValue: ' expected\n");
4175
138k
        }
4176
300k
    } else
4177
964k
  NEXT;
4178
4179
1.98M
    if (attlen != NULL) *attlen = len;
4180
1.98M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
921
error:
4185
921
    if (buf != NULL)
4186
921
        xmlFree(buf);
4187
921
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
921
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
3.28M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
3.28M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
3.28M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
3.28M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
357k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
357k
    xmlChar *buf = NULL;
4250
357k
    int len = 0;
4251
357k
    int size = XML_PARSER_BUFFER_SIZE;
4252
357k
    int cur, l;
4253
357k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
113k
                    XML_MAX_TEXT_LENGTH :
4255
357k
                    XML_MAX_NAME_LENGTH;
4256
357k
    xmlChar stop;
4257
357k
    int state = ctxt->instate;
4258
357k
    int count = 0;
4259
4260
357k
    SHRINK;
4261
357k
    if (RAW == '"') {
4262
262k
        NEXT;
4263
262k
  stop = '"';
4264
262k
    } else if (RAW == '\'') {
4265
81.6k
        NEXT;
4266
81.6k
  stop = '\'';
4267
81.6k
    } else {
4268
13.3k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
13.3k
  return(NULL);
4270
13.3k
    }
4271
4272
344k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
344k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
344k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
344k
    cur = CUR_CHAR(l);
4279
15.8M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
15.5M
  if (len + 5 >= size) {
4281
23.7k
      xmlChar *tmp;
4282
4283
23.7k
      size *= 2;
4284
23.7k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
23.7k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
23.7k
      buf = tmp;
4292
23.7k
  }
4293
15.5M
  count++;
4294
15.5M
  if (count > 50) {
4295
208k
      SHRINK;
4296
208k
      GROW;
4297
208k
      count = 0;
4298
208k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
208k
  }
4303
15.5M
  COPY_BUF(l,buf,len,cur);
4304
15.5M
  NEXTL(l);
4305
15.5M
  cur = CUR_CHAR(l);
4306
15.5M
  if (cur == 0) {
4307
5.71k
      GROW;
4308
5.71k
      SHRINK;
4309
5.71k
      cur = CUR_CHAR(l);
4310
5.71k
  }
4311
15.5M
        if (len > maxLength) {
4312
49
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
49
            xmlFree(buf);
4314
49
            ctxt->instate = (xmlParserInputState) state;
4315
49
            return(NULL);
4316
49
        }
4317
15.5M
    }
4318
344k
    buf[len] = 0;
4319
344k
    ctxt->instate = (xmlParserInputState) state;
4320
344k
    if (!IS_CHAR(cur)) {
4321
9.25k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
334k
    } else {
4323
334k
  NEXT;
4324
334k
    }
4325
344k
    return(buf);
4326
344k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
104k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
104k
    xmlChar *buf = NULL;
4344
104k
    int len = 0;
4345
104k
    int size = XML_PARSER_BUFFER_SIZE;
4346
104k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
36.8k
                    XML_MAX_TEXT_LENGTH :
4348
104k
                    XML_MAX_NAME_LENGTH;
4349
104k
    xmlChar cur;
4350
104k
    xmlChar stop;
4351
104k
    int count = 0;
4352
104k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
104k
    SHRINK;
4355
104k
    if (RAW == '"') {
4356
56.7k
        NEXT;
4357
56.7k
  stop = '"';
4358
56.7k
    } else if (RAW == '\'') {
4359
45.7k
        NEXT;
4360
45.7k
  stop = '\'';
4361
45.7k
    } else {
4362
2.14k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
2.14k
  return(NULL);
4364
2.14k
    }
4365
102k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
102k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
102k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
102k
    cur = CUR;
4372
3.28M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
3.17M
  if (len + 1 >= size) {
4374
4.02k
      xmlChar *tmp;
4375
4376
4.02k
      size *= 2;
4377
4.02k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
4.02k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
4.02k
      buf = tmp;
4384
4.02k
  }
4385
3.17M
  buf[len++] = cur;
4386
3.17M
  count++;
4387
3.17M
  if (count > 50) {
4388
33.1k
      SHRINK;
4389
33.1k
      GROW;
4390
33.1k
      count = 0;
4391
33.1k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
33.1k
  }
4396
3.17M
  NEXT;
4397
3.17M
  cur = CUR;
4398
3.17M
  if (cur == 0) {
4399
907
      GROW;
4400
907
      SHRINK;
4401
907
      cur = CUR;
4402
907
  }
4403
3.17M
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
3.17M
    }
4409
102k
    buf[len] = 0;
4410
102k
    if (cur != stop) {
4411
12.3k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
90.1k
    } else {
4413
90.1k
  NEXT;
4414
90.1k
    }
4415
102k
    ctxt->instate = oldstate;
4416
102k
    return(buf);
4417
102k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
36.7M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
36.7M
    const xmlChar *in;
4482
36.7M
    int nbchar = 0;
4483
36.7M
    int line = ctxt->input->line;
4484
36.7M
    int col = ctxt->input->col;
4485
36.7M
    int ccol;
4486
4487
36.7M
    SHRINK;
4488
36.7M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
36.7M
    in = ctxt->input->cur;
4494
41.2M
    do {
4495
46.7M
get_more_space:
4496
70.7M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
46.7M
        if (*in == 0xA) {
4498
5.89M
            do {
4499
5.89M
                ctxt->input->line++; ctxt->input->col = 1;
4500
5.89M
                in++;
4501
5.89M
            } while (*in == 0xA);
4502
5.55M
            goto get_more_space;
4503
5.55M
        }
4504
41.2M
        if (*in == '<') {
4505
6.66M
            nbchar = in - ctxt->input->cur;
4506
6.66M
            if (nbchar > 0) {
4507
6.66M
                const xmlChar *tmp = ctxt->input->cur;
4508
6.66M
                ctxt->input->cur = in;
4509
4510
6.66M
                if ((ctxt->sax != NULL) &&
4511
6.66M
                    (ctxt->sax->ignorableWhitespace !=
4512
6.66M
                     ctxt->sax->characters)) {
4513
3.08M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.82M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.82M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.82M
                                                   tmp, nbchar);
4517
1.82M
                    } else {
4518
1.26M
                        if (ctxt->sax->characters != NULL)
4519
1.26M
                            ctxt->sax->characters(ctxt->userData,
4520
1.26M
                                                  tmp, nbchar);
4521
1.26M
                        if (*ctxt->space == -1)
4522
372k
                            *ctxt->space = -2;
4523
1.26M
                    }
4524
3.57M
                } else if ((ctxt->sax != NULL) &&
4525
3.57M
                           (ctxt->sax->characters != NULL)) {
4526
3.57M
                    ctxt->sax->characters(ctxt->userData,
4527
3.57M
                                          tmp, nbchar);
4528
3.57M
                }
4529
6.66M
            }
4530
6.66M
            return;
4531
6.66M
        }
4532
4533
41.3M
get_more:
4534
41.3M
        ccol = ctxt->input->col;
4535
321M
        while (test_char_data[*in]) {
4536
280M
            in++;
4537
280M
            ccol++;
4538
280M
        }
4539
41.3M
        ctxt->input->col = ccol;
4540
41.3M
        if (*in == 0xA) {
4541
6.10M
            do {
4542
6.10M
                ctxt->input->line++; ctxt->input->col = 1;
4543
6.10M
                in++;
4544
6.10M
            } while (*in == 0xA);
4545
5.29M
            goto get_more;
4546
5.29M
        }
4547
36.0M
        if (*in == ']') {
4548
1.70M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
207k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
207k
                ctxt->input->cur = in + 1;
4551
207k
                return;
4552
207k
            }
4553
1.49M
            in++;
4554
1.49M
            ctxt->input->col++;
4555
1.49M
            goto get_more;
4556
1.70M
        }
4557
34.3M
        nbchar = in - ctxt->input->cur;
4558
34.3M
        if (nbchar > 0) {
4559
15.5M
            if ((ctxt->sax != NULL) &&
4560
15.5M
                (ctxt->sax->ignorableWhitespace !=
4561
15.5M
                 ctxt->sax->characters) &&
4562
15.5M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
1.71M
                const xmlChar *tmp = ctxt->input->cur;
4564
1.71M
                ctxt->input->cur = in;
4565
4566
1.71M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
357k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
357k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
357k
                                                       tmp, nbchar);
4570
1.35M
                } else {
4571
1.35M
                    if (ctxt->sax->characters != NULL)
4572
1.35M
                        ctxt->sax->characters(ctxt->userData,
4573
1.35M
                                              tmp, nbchar);
4574
1.35M
                    if (*ctxt->space == -1)
4575
628k
                        *ctxt->space = -2;
4576
1.35M
                }
4577
1.71M
                line = ctxt->input->line;
4578
1.71M
                col = ctxt->input->col;
4579
13.8M
            } else if (ctxt->sax != NULL) {
4580
13.8M
                if (ctxt->sax->characters != NULL)
4581
13.8M
                    ctxt->sax->characters(ctxt->userData,
4582
13.8M
                                          ctxt->input->cur, nbchar);
4583
13.8M
                line = ctxt->input->line;
4584
13.8M
                col = ctxt->input->col;
4585
13.8M
            }
4586
15.5M
        }
4587
34.3M
        ctxt->input->cur = in;
4588
34.3M
        if (*in == 0xD) {
4589
4.77M
            in++;
4590
4.77M
            if (*in == 0xA) {
4591
4.53M
                ctxt->input->cur = in;
4592
4.53M
                in++;
4593
4.53M
                ctxt->input->line++; ctxt->input->col = 1;
4594
4.53M
                continue; /* while */
4595
4.53M
            }
4596
242k
            in--;
4597
242k
        }
4598
29.8M
        if (*in == '<') {
4599
8.62M
            return;
4600
8.62M
        }
4601
21.2M
        if (*in == '&') {
4602
1.43M
            return;
4603
1.43M
        }
4604
19.7M
        SHRINK;
4605
19.7M
        GROW;
4606
19.7M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
19.7M
        in = ctxt->input->cur;
4609
24.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
24.3M
             (*in == 0x09) || (*in == 0x0a));
4611
19.8M
    ctxt->input->line = line;
4612
19.8M
    ctxt->input->col = col;
4613
19.8M
    xmlParseCharDataComplex(ctxt);
4614
19.8M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
19.8M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
19.8M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
19.8M
    int nbchar = 0;
4631
19.8M
    int cur, l;
4632
19.8M
    int count = 0;
4633
4634
19.8M
    SHRINK;
4635
19.8M
    GROW;
4636
19.8M
    cur = CUR_CHAR(l);
4637
158M
    while ((cur != '<') && /* checked */
4638
158M
           (cur != '&') &&
4639
158M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
138M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
72.7k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
72.7k
  }
4643
138M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
138M
  NEXTL(l);
4646
138M
  cur = CUR_CHAR(l);
4647
138M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
94.3k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
94.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
86.2k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
69
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
69
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
69
                                     buf, nbchar);
4658
86.2k
    } else {
4659
86.2k
        if (ctxt->sax->characters != NULL)
4660
86.2k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
86.2k
        if ((ctxt->sax->characters !=
4662
86.2k
             ctxt->sax->ignorableWhitespace) &&
4663
86.2k
      (*ctxt->space == -1))
4664
5.39k
      *ctxt->space = -2;
4665
86.2k
    }
4666
86.2k
      }
4667
94.3k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
94.3k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
94.3k
  }
4672
138M
  count++;
4673
138M
  if (count > 50) {
4674
1.27M
      SHRINK;
4675
1.27M
      GROW;
4676
1.27M
      count = 0;
4677
1.27M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.27M
  }
4680
138M
    }
4681
19.8M
    if (nbchar != 0) {
4682
4.90M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
4.90M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
4.51M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
19.7k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
19.7k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
4.50M
      } else {
4691
4.50M
    if (ctxt->sax->characters != NULL)
4692
4.50M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
4.50M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
4.50M
        (*ctxt->space == -1))
4695
555k
        *ctxt->space = -2;
4696
4.50M
      }
4697
4.51M
  }
4698
4.90M
    }
4699
19.8M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
15.9M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
15.9M
                          "PCDATA invalid Char value %d\n",
4703
15.9M
                    cur ? cur : CUR);
4704
15.9M
  NEXT;
4705
15.9M
    }
4706
19.8M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
719k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
719k
    xmlChar *URI = NULL;
4735
4736
719k
    SHRINK;
4737
4738
719k
    *publicID = NULL;
4739
719k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
267k
        SKIP(6);
4741
267k
  if (SKIP_BLANKS == 0) {
4742
939
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
939
                     "Space required after 'SYSTEM'\n");
4744
939
  }
4745
267k
  URI = xmlParseSystemLiteral(ctxt);
4746
267k
  if (URI == NULL) {
4747
1.55k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
1.55k
        }
4749
452k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
104k
        SKIP(6);
4751
104k
  if (SKIP_BLANKS == 0) {
4752
2.43k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
2.43k
        "Space required after 'PUBLIC'\n");
4754
2.43k
  }
4755
104k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
104k
  if (*publicID == NULL) {
4757
2.14k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
2.14k
  }
4759
104k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
89.1k
      if (SKIP_BLANKS == 0) {
4764
12.5k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
12.5k
      "Space required after the Public Identifier\n");
4766
12.5k
      }
4767
89.1k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
15.3k
      if (SKIP_BLANKS == 0) return(NULL);
4775
1.84k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
1.84k
  }
4777
89.7k
  URI = xmlParseSystemLiteral(ctxt);
4778
89.7k
  if (URI == NULL) {
4779
11.8k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
11.8k
        }
4781
89.7k
    }
4782
705k
    return(URI);
4783
719k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
289k
                       size_t len, size_t size) {
4802
289k
    int q, ql;
4803
289k
    int r, rl;
4804
289k
    int cur, l;
4805
289k
    size_t count = 0;
4806
289k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
96.4k
                       XML_MAX_HUGE_LENGTH :
4808
289k
                       XML_MAX_TEXT_LENGTH;
4809
289k
    int inputid;
4810
4811
289k
    inputid = ctxt->input->id;
4812
4813
289k
    if (buf == NULL) {
4814
38.9k
        len = 0;
4815
38.9k
  size = XML_PARSER_BUFFER_SIZE;
4816
38.9k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
38.9k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
38.9k
    }
4822
289k
    GROW; /* Assure there's enough input data */
4823
289k
    q = CUR_CHAR(ql);
4824
289k
    if (q == 0)
4825
26.9k
        goto not_terminated;
4826
262k
    if (!IS_CHAR(q)) {
4827
34.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
34.6k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
34.6k
                    q);
4830
34.6k
  xmlFree (buf);
4831
34.6k
  return;
4832
34.6k
    }
4833
228k
    NEXTL(ql);
4834
228k
    r = CUR_CHAR(rl);
4835
228k
    if (r == 0)
4836
4.49k
        goto not_terminated;
4837
223k
    if (!IS_CHAR(r)) {
4838
17.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
17.2k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
17.2k
                    r);
4841
17.2k
  xmlFree (buf);
4842
17.2k
  return;
4843
17.2k
    }
4844
206k
    NEXTL(rl);
4845
206k
    cur = CUR_CHAR(l);
4846
206k
    if (cur == 0)
4847
2.45k
        goto not_terminated;
4848
24.9M
    while (IS_CHAR(cur) && /* checked */
4849
24.9M
           ((cur != '>') ||
4850
24.8M
      (r != '-') || (q != '-'))) {
4851
24.7M
  if ((r == '-') && (q == '-')) {
4852
154k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
154k
  }
4854
24.7M
  if (len + 5 >= size) {
4855
97.7k
      xmlChar *new_buf;
4856
97.7k
            size_t new_size;
4857
4858
97.7k
      new_size = size * 2;
4859
97.7k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
97.7k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
97.7k
      buf = new_buf;
4866
97.7k
            size = new_size;
4867
97.7k
  }
4868
24.7M
  COPY_BUF(ql,buf,len,q);
4869
24.7M
  q = r;
4870
24.7M
  ql = rl;
4871
24.7M
  r = cur;
4872
24.7M
  rl = l;
4873
4874
24.7M
  count++;
4875
24.7M
  if (count > 50) {
4876
400k
      SHRINK;
4877
400k
      GROW;
4878
400k
      count = 0;
4879
400k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
400k
  }
4884
24.7M
  NEXTL(l);
4885
24.7M
  cur = CUR_CHAR(l);
4886
24.7M
  if (cur == 0) {
4887
23.1k
      SHRINK;
4888
23.1k
      GROW;
4889
23.1k
      cur = CUR_CHAR(l);
4890
23.1k
  }
4891
4892
24.7M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
24.7M
    }
4899
204k
    buf[len] = 0;
4900
204k
    if (cur == 0) {
4901
23.1k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
23.1k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
180k
    } else if (!IS_CHAR(cur)) {
4904
40.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
40.8k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
40.8k
                    cur);
4907
140k
    } else {
4908
140k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
140k
        NEXT;
4914
140k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
140k
      (!ctxt->disableSAX))
4916
117k
      ctxt->sax->comment(ctxt->userData, buf);
4917
140k
    }
4918
204k
    xmlFree(buf);
4919
204k
    return;
4920
33.8k
not_terminated:
4921
33.8k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
33.8k
       "Comment not terminated\n", NULL);
4923
33.8k
    xmlFree(buf);
4924
33.8k
    return;
4925
204k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
18.0M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
18.0M
    xmlChar *buf = NULL;
4943
18.0M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
18.0M
    size_t len = 0;
4945
18.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
883k
                       XML_MAX_HUGE_LENGTH :
4947
18.0M
                       XML_MAX_TEXT_LENGTH;
4948
18.0M
    xmlParserInputState state;
4949
18.0M
    const xmlChar *in;
4950
18.0M
    size_t nbchar = 0;
4951
18.0M
    int ccol;
4952
18.0M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
18.0M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
18.0M
    SKIP(2);
4960
18.0M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
341
        return;
4962
18.0M
    state = ctxt->instate;
4963
18.0M
    ctxt->instate = XML_PARSER_COMMENT;
4964
18.0M
    inputid = ctxt->input->id;
4965
18.0M
    SKIP(2);
4966
18.0M
    SHRINK;
4967
18.0M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
18.0M
    in = ctxt->input->cur;
4974
18.0M
    do {
4975
18.0M
  if (*in == 0xA) {
4976
100k
      do {
4977
100k
    ctxt->input->line++; ctxt->input->col = 1;
4978
100k
    in++;
4979
100k
      } while (*in == 0xA);
4980
71.3k
  }
4981
20.6M
get_more:
4982
20.6M
        ccol = ctxt->input->col;
4983
120M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
120M
         ((*in >= 0x20) && (*in < '-')) ||
4985
120M
         (*in == 0x09)) {
4986
99.5M
        in++;
4987
99.5M
        ccol++;
4988
99.5M
  }
4989
20.6M
  ctxt->input->col = ccol;
4990
20.6M
  if (*in == 0xA) {
4991
945k
      do {
4992
945k
    ctxt->input->line++; ctxt->input->col = 1;
4993
945k
    in++;
4994
945k
      } while (*in == 0xA);
4995
845k
      goto get_more;
4996
845k
  }
4997
19.8M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
19.8M
  if (nbchar > 0) {
5002
2.81M
      if ((ctxt->sax != NULL) &&
5003
2.81M
    (ctxt->sax->comment != NULL)) {
5004
2.81M
    if (buf == NULL) {
5005
1.10M
        if ((*in == '-') && (in[1] == '-'))
5006
686k
            size = nbchar + 1;
5007
421k
        else
5008
421k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
1.10M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
1.10M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
1.10M
        len = 0;
5016
1.70M
    } else if (len + nbchar + 1 >= size) {
5017
230k
        xmlChar *new_buf;
5018
230k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
230k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
230k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
230k
        buf = new_buf;
5027
230k
    }
5028
2.81M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
2.81M
    len += nbchar;
5030
2.81M
    buf[len] = 0;
5031
2.81M
      }
5032
2.81M
  }
5033
19.8M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
19.8M
  ctxt->input->cur = in;
5040
19.8M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
19.8M
  if (*in == 0xD) {
5045
958k
      in++;
5046
958k
      if (*in == 0xA) {
5047
941k
    ctxt->input->cur = in;
5048
941k
    in++;
5049
941k
    ctxt->input->line++; ctxt->input->col = 1;
5050
941k
    goto get_more;
5051
941k
      }
5052
17.0k
      in--;
5053
17.0k
  }
5054
18.8M
  SHRINK;
5055
18.8M
  GROW;
5056
18.8M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
18.8M
  in = ctxt->input->cur;
5061
18.8M
  if (*in == '-') {
5062
18.5M
      if (in[1] == '-') {
5063
17.9M
          if (in[2] == '>') {
5064
17.7M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
17.7M
        SKIP(3);
5070
17.7M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
17.7M
            (!ctxt->disableSAX)) {
5072
13.5M
      if (buf != NULL)
5073
789k
          ctxt->sax->comment(ctxt->userData, buf);
5074
12.7M
      else
5075
12.7M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
13.5M
        }
5077
17.7M
        if (buf != NULL)
5078
856k
            xmlFree(buf);
5079
17.7M
        if (ctxt->instate != XML_PARSER_EOF)
5080
17.7M
      ctxt->instate = state;
5081
17.7M
        return;
5082
17.7M
    }
5083
151k
    if (buf != NULL) {
5084
141k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
141k
                          "Double hyphen within comment: "
5086
141k
                                      "<!--%.50s\n",
5087
141k
              buf);
5088
141k
    } else
5089
10.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
10.6k
                          "Double hyphen within comment\n", NULL);
5091
151k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
151k
    in++;
5096
151k
    ctxt->input->col++;
5097
151k
      }
5098
788k
      in++;
5099
788k
      ctxt->input->col++;
5100
788k
      goto get_more;
5101
18.5M
  }
5102
18.8M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
289k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
289k
    ctxt->instate = state;
5105
289k
    return;
5106
18.0M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
767k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
767k
    const xmlChar *name;
5125
5126
767k
    name = xmlParseName(ctxt);
5127
767k
    if ((name != NULL) &&
5128
767k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
767k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
767k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
257k
  int i;
5132
257k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
257k
      (name[2] == 'l') && (name[3] == 0)) {
5134
185k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
185k
     "XML declaration allowed only at the start of the document\n");
5136
185k
      return(name);
5137
185k
  } else if (name[3] == 0) {
5138
32.8k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
32.8k
      return(name);
5140
32.8k
  }
5141
113k
  for (i = 0;;i++) {
5142
113k
      if (xmlW3CPIs[i] == NULL) break;
5143
76.4k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
1.57k
          return(name);
5145
76.4k
  }
5146
36.9k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
36.9k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
36.9k
          NULL, NULL);
5149
36.9k
    }
5150
547k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
27.0k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
27.0k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
27.0k
    }
5154
547k
    return(name);
5155
767k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
335
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
335
    xmlChar *URL = NULL;
5176
335
    const xmlChar *tmp, *base;
5177
335
    xmlChar marker;
5178
5179
335
    tmp = catalog;
5180
335
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
335
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
286
  goto error;
5183
49
    tmp += 7;
5184
270
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
49
    if (*tmp != '=') {
5186
31
  return;
5187
31
    }
5188
18
    tmp++;
5189
36
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
18
    marker = *tmp;
5191
18
    if ((marker != '\'') && (marker != '"'))
5192
12
  goto error;
5193
6
    tmp++;
5194
6
    base = tmp;
5195
132
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
6
    if (*tmp == 0)
5197
3
  goto error;
5198
3
    URL = xmlStrndup(base, tmp - base);
5199
3
    tmp++;
5200
3
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
3
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
3
    if (URL != NULL) {
5205
3
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
3
  xmlFree(URL);
5207
3
    }
5208
3
    return;
5209
5210
301
error:
5211
301
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
301
            "Catalog PI syntax error: %s\n",
5213
301
      catalog, NULL);
5214
301
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
301
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
767k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
767k
    xmlChar *buf = NULL;
5235
767k
    size_t len = 0;
5236
767k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
767k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
261k
                       XML_MAX_HUGE_LENGTH :
5239
767k
                       XML_MAX_TEXT_LENGTH;
5240
767k
    int cur, l;
5241
767k
    const xmlChar *target;
5242
767k
    xmlParserInputState state;
5243
767k
    int count = 0;
5244
5245
767k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
767k
  int inputid = ctxt->input->id;
5247
767k
  state = ctxt->instate;
5248
767k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
767k
  SKIP(2);
5253
767k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
767k
        target = xmlParsePITarget(ctxt);
5260
767k
  if (target != NULL) {
5261
675k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
128k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
128k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
128k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
128k
        (ctxt->sax->processingInstruction != NULL))
5274
116k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
116k
                                         target, NULL);
5276
128k
    if (ctxt->instate != XML_PARSER_EOF)
5277
128k
        ctxt->instate = state;
5278
128k
    return;
5279
128k
      }
5280
546k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
546k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
546k
      if (SKIP_BLANKS == 0) {
5287
220k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
220k
        "ParsePI: PI %s space expected\n", target);
5289
220k
      }
5290
546k
      cur = CUR_CHAR(l);
5291
34.3M
      while (IS_CHAR(cur) && /* checked */
5292
34.3M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
33.8M
    if (len + 5 >= size) {
5294
79.6k
        xmlChar *tmp;
5295
79.6k
                    size_t new_size = size * 2;
5296
79.6k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
79.6k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
79.6k
        buf = tmp;
5304
79.6k
                    size = new_size;
5305
79.6k
    }
5306
33.8M
    count++;
5307
33.8M
    if (count > 50) {
5308
474k
        SHRINK;
5309
474k
        GROW;
5310
474k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
474k
        count = 0;
5315
474k
    }
5316
33.8M
    COPY_BUF(l,buf,len,cur);
5317
33.8M
    NEXTL(l);
5318
33.8M
    cur = CUR_CHAR(l);
5319
33.8M
    if (cur == 0) {
5320
34.1k
        SHRINK;
5321
34.1k
        GROW;
5322
34.1k
        cur = CUR_CHAR(l);
5323
34.1k
    }
5324
33.8M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
33.8M
      }
5332
546k
      buf[len] = 0;
5333
546k
      if (cur != '?') {
5334
114k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
114k
          "ParsePI: PI %s never end ...\n", target);
5336
431k
      } else {
5337
431k
    if (inputid != ctxt->input->id) {
5338
31
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
31
                             "PI declaration doesn't start and stop in"
5340
31
                                   " the same entity\n");
5341
31
    }
5342
431k
    SKIP(2);
5343
5344
431k
#ifdef LIBXML_CATALOG_ENABLED
5345
431k
    if (((state == XML_PARSER_MISC) ||
5346
431k
               (state == XML_PARSER_START)) &&
5347
431k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
335
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
335
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
335
      (allow == XML_CATA_ALLOW_ALL))
5351
335
      xmlParseCatalogPI(ctxt, buf);
5352
335
    }
5353
431k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
431k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
431k
        (ctxt->sax->processingInstruction != NULL))
5361
362k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
362k
                                         target, buf);
5363
431k
      }
5364
546k
      xmlFree(buf);
5365
546k
  } else {
5366
92.0k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
92.0k
  }
5368
638k
  if (ctxt->instate != XML_PARSER_EOF)
5369
638k
      ctxt->instate = state;
5370
638k
    }
5371
767k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
31.1k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
31.1k
    const xmlChar *name;
5394
31.1k
    xmlChar *Pubid;
5395
31.1k
    xmlChar *Systemid;
5396
5397
31.1k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
31.1k
    SKIP(2);
5400
5401
31.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
29.9k
  int inputid = ctxt->input->id;
5403
29.9k
  SHRINK;
5404
29.9k
  SKIP(8);
5405
29.9k
  if (SKIP_BLANKS == 0) {
5406
587
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
587
         "Space required after '<!NOTATION'\n");
5408
587
      return;
5409
587
  }
5410
5411
29.3k
        name = xmlParseName(ctxt);
5412
29.3k
  if (name == NULL) {
5413
590
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
590
      return;
5415
590
  }
5416
28.7k
  if (xmlStrchr(name, ':') != NULL) {
5417
584
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
584
         "colons are forbidden from notation names '%s'\n",
5419
584
         name, NULL, NULL);
5420
584
  }
5421
28.7k
  if (SKIP_BLANKS == 0) {
5422
1.80k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.80k
         "Space required after the NOTATION name'\n");
5424
1.80k
      return;
5425
1.80k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
26.9k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
26.9k
  SKIP_BLANKS;
5432
5433
26.9k
  if (RAW == '>') {
5434
18.1k
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
18.1k
      NEXT;
5440
18.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
18.1k
    (ctxt->sax->notationDecl != NULL))
5442
14.2k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
18.1k
  } else {
5444
8.77k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
8.77k
  }
5446
26.9k
  if (Systemid != NULL) xmlFree(Systemid);
5447
26.9k
  if (Pubid != NULL) xmlFree(Pubid);
5448
26.9k
    }
5449
31.1k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
892k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
892k
    const xmlChar *name = NULL;
5478
892k
    xmlChar *value = NULL;
5479
892k
    xmlChar *URI = NULL, *literal = NULL;
5480
892k
    const xmlChar *ndata = NULL;
5481
892k
    int isParameter = 0;
5482
892k
    xmlChar *orig = NULL;
5483
5484
892k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
892k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
892k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
888k
  int inputid = ctxt->input->id;
5491
888k
  SHRINK;
5492
888k
  SKIP(6);
5493
888k
  if (SKIP_BLANKS == 0) {
5494
11.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
11.2k
         "Space required after '<!ENTITY'\n");
5496
11.2k
  }
5497
5498
888k
  if (RAW == '%') {
5499
214k
      NEXT;
5500
214k
      if (SKIP_BLANKS == 0) {
5501
2.89k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
2.89k
             "Space required after '%%'\n");
5503
2.89k
      }
5504
214k
      isParameter = 1;
5505
214k
  }
5506
5507
888k
        name = xmlParseName(ctxt);
5508
888k
  if (name == NULL) {
5509
23.3k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
23.3k
                     "xmlParseEntityDecl: no name\n");
5511
23.3k
            return;
5512
23.3k
  }
5513
865k
  if (xmlStrchr(name, ':') != NULL) {
5514
6.07k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
6.07k
         "colons are forbidden from entities names '%s'\n",
5516
6.07k
         name, NULL, NULL);
5517
6.07k
  }
5518
865k
  if (SKIP_BLANKS == 0) {
5519
17.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
17.4k
         "Space required after the entity name\n");
5521
17.4k
  }
5522
5523
865k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
865k
  if (isParameter) {
5528
212k
      if ((RAW == '"') || (RAW == '\'')) {
5529
179k
          value = xmlParseEntityValue(ctxt, &orig);
5530
179k
    if (value) {
5531
161k
        if ((ctxt->sax != NULL) &&
5532
161k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
144k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
144k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
144k
            NULL, NULL, value);
5536
161k
    }
5537
179k
      } else {
5538
33.5k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
33.5k
    if ((URI == NULL) && (literal == NULL)) {
5540
2.19k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
2.19k
    }
5542
33.5k
    if (URI) {
5543
30.3k
        xmlURIPtr uri;
5544
5545
30.3k
        uri = xmlParseURI((const char *) URI);
5546
30.3k
        if (uri == NULL) {
5547
1.60k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.60k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
28.7k
        } else {
5555
28.7k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
157
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
28.5k
      } else {
5562
28.5k
          if ((ctxt->sax != NULL) &&
5563
28.5k
        (!ctxt->disableSAX) &&
5564
28.5k
        (ctxt->sax->entityDecl != NULL))
5565
26.6k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
26.6k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
26.6k
              literal, URI, NULL);
5568
28.5k
      }
5569
28.7k
      xmlFreeURI(uri);
5570
28.7k
        }
5571
30.3k
    }
5572
33.5k
      }
5573
652k
  } else {
5574
652k
      if ((RAW == '"') || (RAW == '\'')) {
5575
511k
          value = xmlParseEntityValue(ctxt, &orig);
5576
511k
    if ((ctxt->sax != NULL) &&
5577
511k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
441k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
441k
        XML_INTERNAL_GENERAL_ENTITY,
5580
441k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
511k
    if ((ctxt->myDoc == NULL) ||
5585
511k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
11.5k
        if (ctxt->myDoc == NULL) {
5587
2.04k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
2.04k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
2.04k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
2.04k
        }
5594
11.5k
        if (ctxt->myDoc->intSubset == NULL)
5595
2.04k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
2.04k
              BAD_CAST "fake", NULL, NULL);
5597
5598
11.5k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
11.5k
                    NULL, NULL, value);
5600
11.5k
    }
5601
511k
      } else {
5602
141k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
141k
    if ((URI == NULL) && (literal == NULL)) {
5604
14.6k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
14.6k
    }
5606
141k
    if (URI) {
5607
123k
        xmlURIPtr uri;
5608
5609
123k
        uri = xmlParseURI((const char *)URI);
5610
123k
        if (uri == NULL) {
5611
8.54k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
8.54k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
114k
        } else {
5619
114k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
5.21k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
5.21k
      }
5626
114k
      xmlFreeURI(uri);
5627
114k
        }
5628
123k
    }
5629
141k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
15.5k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
15.5k
           "Space required before 'NDATA'\n");
5632
15.5k
    }
5633
141k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
24.8k
        SKIP(5);
5635
24.8k
        if (SKIP_BLANKS == 0) {
5636
707
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
707
               "Space required after 'NDATA'\n");
5638
707
        }
5639
24.8k
        ndata = xmlParseName(ctxt);
5640
24.8k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
24.8k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
22.5k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
22.5k
            literal, URI, ndata);
5644
116k
    } else {
5645
116k
        if ((ctxt->sax != NULL) &&
5646
116k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
100k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
100k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
100k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
116k
        if ((ctxt->replaceEntities != 0) &&
5655
116k
      ((ctxt->myDoc == NULL) ||
5656
77.0k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
2.88k
      if (ctxt->myDoc == NULL) {
5658
1.43k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.43k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.43k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.43k
      }
5665
5666
2.88k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.43k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.43k
            BAD_CAST "fake", NULL, NULL);
5669
2.88k
      xmlSAX2EntityDecl(ctxt, name,
5670
2.88k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
2.88k
                  literal, URI, NULL);
5672
2.88k
        }
5673
116k
    }
5674
141k
      }
5675
652k
  }
5676
865k
  if (ctxt->instate == XML_PARSER_EOF)
5677
844
      goto done;
5678
864k
  SKIP_BLANKS;
5679
864k
  if (RAW != '>') {
5680
37.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
37.7k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
37.7k
      xmlHaltParser(ctxt);
5683
826k
  } else {
5684
826k
      if (inputid != ctxt->input->id) {
5685
87
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
87
                         "Entity declaration doesn't start and stop in"
5687
87
                               " the same entity\n");
5688
87
      }
5689
826k
      NEXT;
5690
826k
  }
5691
864k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
646k
      xmlEntityPtr cur = NULL;
5696
5697
646k
      if (isParameter) {
5698
169k
          if ((ctxt->sax != NULL) &&
5699
169k
        (ctxt->sax->getParameterEntity != NULL))
5700
169k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
476k
      } else {
5702
476k
          if ((ctxt->sax != NULL) &&
5703
476k
        (ctxt->sax->getEntity != NULL))
5704
476k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
476k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
38.9k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
38.9k
    }
5708
476k
      }
5709
646k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
472k
    cur->orig = orig;
5711
472k
                orig = NULL;
5712
472k
      }
5713
646k
  }
5714
5715
865k
done:
5716
865k
  if (value != NULL) xmlFree(value);
5717
865k
  if (URI != NULL) xmlFree(URI);
5718
865k
  if (literal != NULL) xmlFree(literal);
5719
865k
        if (orig != NULL) xmlFree(orig);
5720
865k
    }
5721
892k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
1.42M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
1.42M
    int val;
5757
1.42M
    xmlChar *ret;
5758
5759
1.42M
    *value = NULL;
5760
1.42M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
131k
  SKIP(9);
5762
131k
  return(XML_ATTRIBUTE_REQUIRED);
5763
131k
    }
5764
1.29M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
991k
  SKIP(8);
5766
991k
  return(XML_ATTRIBUTE_IMPLIED);
5767
991k
    }
5768
303k
    val = XML_ATTRIBUTE_NONE;
5769
303k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
77.3k
  SKIP(6);
5771
77.3k
  val = XML_ATTRIBUTE_FIXED;
5772
77.3k
  if (SKIP_BLANKS == 0) {
5773
2.22k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
2.22k
         "Space required after '#FIXED'\n");
5775
2.22k
  }
5776
77.3k
    }
5777
303k
    ret = xmlParseAttValue(ctxt);
5778
303k
    ctxt->instate = XML_PARSER_DTD;
5779
303k
    if (ret == NULL) {
5780
8.26k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
8.26k
           "Attribute default value declaration error\n");
5782
8.26k
    } else
5783
295k
        *value = ret;
5784
303k
    return(val);
5785
1.29M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
11.6k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
11.6k
    const xmlChar *name;
5809
11.6k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
11.6k
    if (RAW != '(') {
5812
1.04k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1.04k
  return(NULL);
5814
1.04k
    }
5815
10.6k
    SHRINK;
5816
14.9k
    do {
5817
14.9k
        NEXT;
5818
14.9k
  SKIP_BLANKS;
5819
14.9k
        name = xmlParseName(ctxt);
5820
14.9k
  if (name == NULL) {
5821
568
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
568
         "Name expected in NOTATION declaration\n");
5823
568
            xmlFreeEnumeration(ret);
5824
568
      return(NULL);
5825
568
  }
5826
14.3k
  tmp = ret;
5827
23.1k
  while (tmp != NULL) {
5828
9.76k
      if (xmlStrEqual(name, tmp->name)) {
5829
975
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
975
    "standalone: attribute notation value token %s duplicated\n",
5831
975
         name, NULL);
5832
975
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
975
    break;
5835
975
      }
5836
8.79k
      tmp = tmp->next;
5837
8.79k
  }
5838
14.3k
  if (tmp == NULL) {
5839
13.4k
      cur = xmlCreateEnumeration(name);
5840
13.4k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
13.4k
      if (last == NULL) ret = last = cur;
5845
3.24k
      else {
5846
3.24k
    last->next = cur;
5847
3.24k
    last = cur;
5848
3.24k
      }
5849
13.4k
  }
5850
14.3k
  SKIP_BLANKS;
5851
14.3k
    } while (RAW == '|');
5852
10.0k
    if (RAW != ')') {
5853
3.07k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
3.07k
        xmlFreeEnumeration(ret);
5855
3.07k
  return(NULL);
5856
3.07k
    }
5857
6.98k
    NEXT;
5858
6.98k
    return(ret);
5859
10.0k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
183k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
183k
    xmlChar *name;
5881
183k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
183k
    if (RAW != '(') {
5884
25.1k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
25.1k
  return(NULL);
5886
25.1k
    }
5887
158k
    SHRINK;
5888
408k
    do {
5889
408k
        NEXT;
5890
408k
  SKIP_BLANKS;
5891
408k
        name = xmlParseNmtoken(ctxt);
5892
408k
  if (name == NULL) {
5893
2.33k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
2.33k
      return(ret);
5895
2.33k
  }
5896
406k
  tmp = ret;
5897
1.01M
  while (tmp != NULL) {
5898
618k
      if (xmlStrEqual(name, tmp->name)) {
5899
11.9k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
11.9k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
11.9k
         name, NULL);
5902
11.9k
    if (!xmlDictOwns(ctxt->dict, name))
5903
11.9k
        xmlFree(name);
5904
11.9k
    break;
5905
11.9k
      }
5906
606k
      tmp = tmp->next;
5907
606k
  }
5908
406k
  if (tmp == NULL) {
5909
394k
      cur = xmlCreateEnumeration(name);
5910
394k
      if (!xmlDictOwns(ctxt->dict, name))
5911
394k
    xmlFree(name);
5912
394k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
394k
      if (last == NULL) ret = last = cur;
5917
237k
      else {
5918
237k
    last->next = cur;
5919
237k
    last = cur;
5920
237k
      }
5921
394k
  }
5922
406k
  SKIP_BLANKS;
5923
406k
    } while (RAW == '|');
5924
155k
    if (RAW != ')') {
5925
6.23k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
6.23k
  return(ret);
5927
6.23k
    }
5928
149k
    NEXT;
5929
149k
    return(ret);
5930
155k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
195k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
195k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
12.0k
  SKIP(8);
5953
12.0k
  if (SKIP_BLANKS == 0) {
5954
370
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
370
         "Space required after 'NOTATION'\n");
5956
370
      return(0);
5957
370
  }
5958
11.6k
  *tree = xmlParseNotationType(ctxt);
5959
11.6k
  if (*tree == NULL) return(0);
5960
6.98k
  return(XML_ATTRIBUTE_NOTATION);
5961
11.6k
    }
5962
183k
    *tree = xmlParseEnumerationType(ctxt);
5963
183k
    if (*tree == NULL) return(0);
5964
156k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
183k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
1.47M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
1.47M
    SHRINK;
6017
1.47M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
570k
  SKIP(5);
6019
570k
  return(XML_ATTRIBUTE_CDATA);
6020
902k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
25.9k
  SKIP(6);
6022
25.9k
  return(XML_ATTRIBUTE_IDREFS);
6023
876k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
47.2k
  SKIP(5);
6025
47.2k
  return(XML_ATTRIBUTE_IDREF);
6026
828k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
302k
        SKIP(2);
6028
302k
  return(XML_ATTRIBUTE_ID);
6029
526k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
9.59k
  SKIP(6);
6031
9.59k
  return(XML_ATTRIBUTE_ENTITY);
6032
516k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
27.0k
  SKIP(8);
6034
27.0k
  return(XML_ATTRIBUTE_ENTITIES);
6035
489k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
80.8k
  SKIP(8);
6037
80.8k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
408k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
213k
  SKIP(7);
6040
213k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
213k
     }
6042
195k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
1.47M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
745k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
745k
    const xmlChar *elemName;
6061
745k
    const xmlChar *attrName;
6062
745k
    xmlEnumerationPtr tree;
6063
6064
745k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
745k
    SKIP(2);
6067
6068
745k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
740k
  int inputid = ctxt->input->id;
6070
6071
740k
  SKIP(7);
6072
740k
  if (SKIP_BLANKS == 0) {
6073
19.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
19.9k
                     "Space required after '<!ATTLIST'\n");
6075
19.9k
  }
6076
740k
        elemName = xmlParseName(ctxt);
6077
740k
  if (elemName == NULL) {
6078
7.15k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
7.15k
         "ATTLIST: no name for Element\n");
6080
7.15k
      return;
6081
7.15k
  }
6082
733k
  SKIP_BLANKS;
6083
733k
  GROW;
6084
2.09M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
1.53M
      int type;
6086
1.53M
      int def;
6087
1.53M
      xmlChar *defaultValue = NULL;
6088
6089
1.53M
      GROW;
6090
1.53M
            tree = NULL;
6091
1.53M
      attrName = xmlParseName(ctxt);
6092
1.53M
      if (attrName == NULL) {
6093
28.5k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
28.5k
             "ATTLIST: no name for Attribute\n");
6095
28.5k
    break;
6096
28.5k
      }
6097
1.50M
      GROW;
6098
1.50M
      if (SKIP_BLANKS == 0) {
6099
32.0k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
32.0k
            "Space required after the attribute name\n");
6101
32.0k
    break;
6102
32.0k
      }
6103
6104
1.47M
      type = xmlParseAttributeType(ctxt, &tree);
6105
1.47M
      if (type <= 0) {
6106
32.1k
          break;
6107
32.1k
      }
6108
6109
1.44M
      GROW;
6110
1.44M
      if (SKIP_BLANKS == 0) {
6111
13.7k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
13.7k
             "Space required after the attribute type\n");
6113
13.7k
          if (tree != NULL)
6114
9.92k
        xmlFreeEnumeration(tree);
6115
13.7k
    break;
6116
13.7k
      }
6117
6118
1.42M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
1.42M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
1.42M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
195k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
1.42M
      GROW;
6130
1.42M
            if (RAW != '>') {
6131
1.14M
    if (SKIP_BLANKS == 0) {
6132
60.9k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
60.9k
      "Space required after the attribute default value\n");
6134
60.9k
        if (defaultValue != NULL)
6135
52.3k
      xmlFree(defaultValue);
6136
60.9k
        if (tree != NULL)
6137
12.5k
      xmlFreeEnumeration(tree);
6138
60.9k
        break;
6139
60.9k
    }
6140
1.14M
      }
6141
1.36M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
1.36M
    (ctxt->sax->attributeDecl != NULL))
6143
1.25M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
1.25M
                          type, def, defaultValue, tree);
6145
111k
      else if (tree != NULL)
6146
13.8k
    xmlFreeEnumeration(tree);
6147
6148
1.36M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
1.36M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
1.36M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
166k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
166k
      }
6153
1.36M
      if (ctxt->sax2) {
6154
964k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
964k
      }
6156
1.36M
      if (defaultValue != NULL)
6157
242k
          xmlFree(defaultValue);
6158
1.36M
      GROW;
6159
1.36M
  }
6160
733k
  if (RAW == '>') {
6161
580k
      if (inputid != ctxt->input->id) {
6162
41
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
41
                               "Attribute list declaration doesn't start and"
6164
41
                               " stop in the same entity\n");
6165
41
      }
6166
580k
      NEXT;
6167
580k
  }
6168
733k
    }
6169
745k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
323k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
323k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
323k
    const xmlChar *elem = NULL;
6196
6197
323k
    GROW;
6198
323k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
323k
  SKIP(7);
6200
323k
  SKIP_BLANKS;
6201
323k
  SHRINK;
6202
323k
  if (RAW == ')') {
6203
234k
      if (ctxt->input->id != inputchk) {
6204
4
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
4
                               "Element content declaration doesn't start and"
6206
4
                               " stop in the same entity\n");
6207
4
      }
6208
234k
      NEXT;
6209
234k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
234k
      if (ret == NULL)
6211
0
          return(NULL);
6212
234k
      if (RAW == '*') {
6213
568
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
568
    NEXT;
6215
568
      }
6216
234k
      return(ret);
6217
234k
  }
6218
89.1k
  if ((RAW == '(') || (RAW == '|')) {
6219
85.5k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
85.5k
      if (ret == NULL) return(NULL);
6221
85.5k
  }
6222
756k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
668k
      NEXT;
6224
668k
      if (elem == NULL) {
6225
85.3k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
85.3k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
85.3k
    ret->c1 = cur;
6231
85.3k
    if (cur != NULL)
6232
85.3k
        cur->parent = ret;
6233
85.3k
    cur = ret;
6234
582k
      } else {
6235
582k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
582k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
582k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
582k
    if (n->c1 != NULL)
6242
582k
        n->c1->parent = n;
6243
582k
          cur->c2 = n;
6244
582k
    if (n != NULL)
6245
582k
        n->parent = cur;
6246
582k
    cur = n;
6247
582k
      }
6248
668k
      SKIP_BLANKS;
6249
668k
      elem = xmlParseName(ctxt);
6250
668k
      if (elem == NULL) {
6251
592
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
592
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
592
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
592
    return(NULL);
6255
592
      }
6256
667k
      SKIP_BLANKS;
6257
667k
      GROW;
6258
667k
  }
6259
88.5k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
79.7k
      if (elem != NULL) {
6261
79.7k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
79.7k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
79.7k
    if (cur->c2 != NULL)
6264
79.7k
        cur->c2->parent = cur;
6265
79.7k
            }
6266
79.7k
            if (ret != NULL)
6267
79.7k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
79.7k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
79.7k
      SKIP(2);
6274
79.7k
  } else {
6275
8.83k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
8.83k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
8.83k
      return(NULL);
6278
8.83k
  }
6279
6280
88.5k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
79.7k
    return(ret);
6284
323k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
1.83M
                                       int depth) {
6321
1.83M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
1.83M
    const xmlChar *elem;
6323
1.83M
    xmlChar type = 0;
6324
6325
1.83M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
1.83M
        (depth >  2048)) {
6327
245
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
245
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
245
                          depth);
6330
245
  return(NULL);
6331
245
    }
6332
1.83M
    SKIP_BLANKS;
6333
1.83M
    GROW;
6334
1.83M
    if (RAW == '(') {
6335
1.47M
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
1.47M
  NEXT;
6339
1.47M
  SKIP_BLANKS;
6340
1.47M
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
1.47M
                                                           depth + 1);
6342
1.47M
        if (cur == NULL)
6343
1.43M
            return(NULL);
6344
47.5k
  SKIP_BLANKS;
6345
47.5k
  GROW;
6346
354k
    } else {
6347
354k
  elem = xmlParseName(ctxt);
6348
354k
  if (elem == NULL) {
6349
11.0k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
11.0k
      return(NULL);
6351
11.0k
  }
6352
343k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
343k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
343k
  GROW;
6358
343k
  if (RAW == '?') {
6359
25.1k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
25.1k
      NEXT;
6361
317k
  } else if (RAW == '*') {
6362
36.1k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
36.1k
      NEXT;
6364
281k
  } else if (RAW == '+') {
6365
31.9k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
31.9k
      NEXT;
6367
249k
  } else {
6368
249k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
249k
  }
6370
343k
  GROW;
6371
343k
    }
6372
390k
    SKIP_BLANKS;
6373
390k
    SHRINK;
6374
1.09M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
764k
        if (RAW == ',') {
6379
328k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
197k
      else if (type != CUR) {
6385
263
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
263
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
263
                      type);
6388
263
    if ((last != NULL) && (last != ret))
6389
263
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
263
    if (ret != NULL)
6391
263
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
263
    return(NULL);
6393
263
      }
6394
327k
      NEXT;
6395
6396
327k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
327k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
327k
      if (last == NULL) {
6404
130k
    op->c1 = ret;
6405
130k
    if (ret != NULL)
6406
130k
        ret->parent = op;
6407
130k
    ret = cur = op;
6408
197k
      } else {
6409
197k
          cur->c2 = op;
6410
197k
    if (op != NULL)
6411
197k
        op->parent = cur;
6412
197k
    op->c1 = last;
6413
197k
    if (last != NULL)
6414
197k
        last->parent = op;
6415
197k
    cur =op;
6416
197k
    last = NULL;
6417
197k
      }
6418
435k
  } else if (RAW == '|') {
6419
395k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
296k
      else if (type != CUR) {
6425
162
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
162
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
162
          type);
6428
162
    if ((last != NULL) && (last != ret))
6429
162
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
162
    if (ret != NULL)
6431
162
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
162
    return(NULL);
6433
162
      }
6434
395k
      NEXT;
6435
6436
395k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
395k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
395k
      if (last == NULL) {
6445
98.7k
    op->c1 = ret;
6446
98.7k
    if (ret != NULL)
6447
98.7k
        ret->parent = op;
6448
98.7k
    ret = cur = op;
6449
296k
      } else {
6450
296k
          cur->c2 = op;
6451
296k
    if (op != NULL)
6452
296k
        op->parent = cur;
6453
296k
    op->c1 = last;
6454
296k
    if (last != NULL)
6455
296k
        last->parent = op;
6456
296k
    cur =op;
6457
296k
    last = NULL;
6458
296k
      }
6459
395k
  } else {
6460
40.2k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
40.2k
      if ((last != NULL) && (last != ret))
6462
16.3k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
40.2k
      if (ret != NULL)
6464
40.2k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
40.2k
      return(NULL);
6466
40.2k
  }
6467
723k
  GROW;
6468
723k
  SKIP_BLANKS;
6469
723k
  GROW;
6470
723k
  if (RAW == '(') {
6471
45.8k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
45.8k
      NEXT;
6474
45.8k
      SKIP_BLANKS;
6475
45.8k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
45.8k
                                                          depth + 1);
6477
45.8k
            if (last == NULL) {
6478
12.1k
    if (ret != NULL)
6479
12.1k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
12.1k
    return(NULL);
6481
12.1k
            }
6482
33.6k
      SKIP_BLANKS;
6483
677k
  } else {
6484
677k
      elem = xmlParseName(ctxt);
6485
677k
      if (elem == NULL) {
6486
2.87k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.87k
    if (ret != NULL)
6488
2.87k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.87k
    return(NULL);
6490
2.87k
      }
6491
674k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
674k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
674k
      if (RAW == '?') {
6498
98.7k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
98.7k
    NEXT;
6500
575k
      } else if (RAW == '*') {
6501
62.6k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
62.6k
    NEXT;
6503
513k
      } else if (RAW == '+') {
6504
15.8k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
15.8k
    NEXT;
6506
497k
      } else {
6507
497k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
497k
      }
6509
674k
  }
6510
708k
  SKIP_BLANKS;
6511
708k
  GROW;
6512
708k
    }
6513
334k
    if ((cur != NULL) && (last != NULL)) {
6514
197k
        cur->c2 = last;
6515
197k
  if (last != NULL)
6516
197k
      last->parent = cur;
6517
197k
    }
6518
334k
    if (ctxt->input->id != inputchk) {
6519
15
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
15
                       "Element content declaration doesn't start and stop in"
6521
15
                       " the same entity\n");
6522
15
    }
6523
334k
    NEXT;
6524
334k
    if (RAW == '?') {
6525
12.7k
  if (ret != NULL) {
6526
12.7k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
12.7k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
616
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
12.1k
      else
6530
12.1k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
12.7k
  }
6532
12.7k
  NEXT;
6533
322k
    } else if (RAW == '*') {
6534
92.4k
  if (ret != NULL) {
6535
92.4k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
92.4k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
338k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
246k
    if ((cur->c1 != NULL) &&
6543
246k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
246k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
13.3k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
246k
    if ((cur->c2 != NULL) &&
6547
246k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
246k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
2.66k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
246k
    cur = cur->c2;
6551
246k
      }
6552
92.4k
  }
6553
92.4k
  NEXT;
6554
229k
    } else if (RAW == '+') {
6555
32.6k
  if (ret != NULL) {
6556
32.6k
      int found = 0;
6557
6558
32.6k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
32.6k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
756
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
31.8k
      else
6562
31.8k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
50.1k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
17.5k
    if ((cur->c1 != NULL) &&
6570
17.5k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
17.5k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
667
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
667
        found = 1;
6574
667
    }
6575
17.5k
    if ((cur->c2 != NULL) &&
6576
17.5k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
17.5k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
597
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
597
        found = 1;
6580
597
    }
6581
17.5k
    cur = cur->c2;
6582
17.5k
      }
6583
32.6k
      if (found)
6584
1.13k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
32.6k
  }
6586
32.6k
  NEXT;
6587
32.6k
    }
6588
334k
    return(ret);
6589
390k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
631k
                           xmlElementContentPtr *result) {
6648
6649
631k
    xmlElementContentPtr tree = NULL;
6650
631k
    int inputid = ctxt->input->id;
6651
631k
    int res;
6652
6653
631k
    *result = NULL;
6654
6655
631k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
631k
    NEXT;
6661
631k
    GROW;
6662
631k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
631k
    SKIP_BLANKS;
6665
631k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
323k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
323k
  res = XML_ELEMENT_TYPE_MIXED;
6668
323k
    } else {
6669
308k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
308k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
308k
    }
6672
631k
    SKIP_BLANKS;
6673
631k
    *result = tree;
6674
631k
    return(res);
6675
631k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
859k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
859k
    const xmlChar *name;
6695
859k
    int ret = -1;
6696
859k
    xmlElementContentPtr content  = NULL;
6697
6698
859k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
859k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
859k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
854k
  int inputid = ctxt->input->id;
6705
6706
854k
  SKIP(7);
6707
854k
  if (SKIP_BLANKS == 0) {
6708
5.67k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
5.67k
               "Space required after 'ELEMENT'\n");
6710
5.67k
      return(-1);
6711
5.67k
  }
6712
848k
        name = xmlParseName(ctxt);
6713
848k
  if (name == NULL) {
6714
6.63k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
6.63k
         "xmlParseElementDecl: no name for Element\n");
6716
6.63k
      return(-1);
6717
6.63k
  }
6718
841k
  if (SKIP_BLANKS == 0) {
6719
25.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
25.9k
         "Space required after the element name\n");
6721
25.9k
  }
6722
841k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
177k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
177k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
664k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
664k
             (NXT(2) == 'Y')) {
6730
7.65k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
7.65k
      ret = XML_ELEMENT_TYPE_ANY;
6735
656k
  } else if (RAW == '(') {
6736
631k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
631k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
24.4k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
24.4k
          (ctxt->inputNr == 1)) {
6743
1.01k
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
1.01k
    "PEReference: forbidden within markup decl in internal subset\n");
6745
23.4k
      } else {
6746
23.4k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
23.4k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
23.4k
            }
6749
24.4k
      return(-1);
6750
24.4k
  }
6751
6752
817k
  SKIP_BLANKS;
6753
6754
817k
  if (RAW != '>') {
6755
58.8k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
58.8k
      if (content != NULL) {
6757
5.95k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
5.95k
      }
6759
758k
  } else {
6760
758k
      if (inputid != ctxt->input->id) {
6761
26
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
26
                               "Element declaration doesn't start and stop in"
6763
26
                               " the same entity\n");
6764
26
      }
6765
6766
758k
      NEXT;
6767
758k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
758k
    (ctxt->sax->elementDecl != NULL)) {
6769
678k
    if (content != NULL)
6770
500k
        content->parent = NULL;
6771
678k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
678k
                           content);
6773
678k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
57.4k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
57.4k
    }
6782
678k
      } else if (content != NULL) {
6783
60.8k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
60.8k
      }
6785
758k
  }
6786
817k
    }
6787
822k
    return(ret);
6788
859k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
12.4k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
12.4k
    int *inputIds = NULL;
6806
12.4k
    size_t inputIdsSize = 0;
6807
12.4k
    size_t depth = 0;
6808
6809
80.7k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
80.3k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
44.0k
            int id = ctxt->input->id;
6812
6813
44.0k
            SKIP(3);
6814
44.0k
            SKIP_BLANKS;
6815
6816
44.0k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
36.2k
                SKIP(7);
6818
36.2k
                SKIP_BLANKS;
6819
36.2k
                if (RAW != '[') {
6820
350
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
350
                    xmlHaltParser(ctxt);
6822
350
                    goto error;
6823
350
                }
6824
35.8k
                if (ctxt->input->id != id) {
6825
16
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
16
                                   "All markup of the conditional section is"
6827
16
                                   " not in the same entity\n");
6828
16
                }
6829
35.8k
                NEXT;
6830
6831
35.8k
                if (inputIdsSize <= depth) {
6832
9.82k
                    int *tmp;
6833
6834
9.82k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
9.82k
                    tmp = (int *) xmlRealloc(inputIds,
6836
9.82k
                            inputIdsSize * sizeof(int));
6837
9.82k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
9.82k
                    inputIds = tmp;
6842
9.82k
                }
6843
35.8k
                inputIds[depth] = id;
6844
35.8k
                depth++;
6845
35.8k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
5.49k
                size_t ignoreDepth = 0;
6847
6848
5.49k
                SKIP(6);
6849
5.49k
                SKIP_BLANKS;
6850
5.49k
                if (RAW != '[') {
6851
201
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
201
                    xmlHaltParser(ctxt);
6853
201
                    goto error;
6854
201
                }
6855
5.29k
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
5.29k
                NEXT;
6861
6862
3.12M
                while (RAW != 0) {
6863
3.12M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
18.1k
                        SKIP(3);
6865
18.1k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
18.1k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
3.10M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
3.10M
                               (NXT(2) == '>')) {
6873
13.2k
                        if (ignoreDepth == 0)
6874
2.37k
                            break;
6875
10.8k
                        SKIP(3);
6876
10.8k
                        ignoreDepth--;
6877
3.09M
                    } else {
6878
3.09M
                        NEXT;
6879
3.09M
                    }
6880
3.12M
                }
6881
6882
5.29k
    if (RAW == 0) {
6883
2.91k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.91k
                    goto error;
6885
2.91k
    }
6886
2.37k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
2.37k
                SKIP(3);
6892
2.37k
            } else {
6893
2.28k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
2.28k
                xmlHaltParser(ctxt);
6895
2.28k
                goto error;
6896
2.28k
            }
6897
44.0k
        } else if ((depth > 0) &&
6898
36.3k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
10.4k
            depth--;
6900
10.4k
            if (ctxt->input->id != inputIds[depth]) {
6901
301
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
301
                               "All markup of the conditional section is not"
6903
301
                               " in the same entity\n");
6904
301
            }
6905
10.4k
            SKIP(3);
6906
25.8k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
22.2k
            xmlParseMarkupDecl(ctxt);
6908
22.2k
        } else {
6909
3.62k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
3.62k
            xmlHaltParser(ctxt);
6911
3.62k
            goto error;
6912
3.62k
        }
6913
6914
70.9k
        if (depth == 0)
6915
2.66k
            break;
6916
6917
68.2k
        SKIP_BLANKS;
6918
68.2k
        GROW;
6919
68.2k
    }
6920
6921
12.4k
error:
6922
12.4k
    xmlFree(inputIds);
6923
12.4k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
20.1M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
20.1M
    GROW;
6952
20.1M
    if (CUR == '<') {
6953
20.1M
        if (NXT(1) == '!') {
6954
20.0M
      switch (NXT(2)) {
6955
1.75M
          case 'E':
6956
1.75M
        if (NXT(3) == 'L')
6957
859k
      xmlParseElementDecl(ctxt);
6958
894k
        else if (NXT(3) == 'N')
6959
892k
      xmlParseEntityDecl(ctxt);
6960
2.08k
                    else
6961
2.08k
                        SKIP(2);
6962
1.75M
        break;
6963
745k
          case 'A':
6964
745k
        xmlParseAttributeListDecl(ctxt);
6965
745k
        break;
6966
31.1k
          case 'N':
6967
31.1k
        xmlParseNotationDecl(ctxt);
6968
31.1k
        break;
6969
17.4M
          case '-':
6970
17.4M
        xmlParseComment(ctxt);
6971
17.4M
        break;
6972
31.2k
    default:
6973
        /* there is an error but it will be detected later */
6974
31.2k
                    SKIP(2);
6975
31.2k
        break;
6976
20.0M
      }
6977
20.0M
  } else if (NXT(1) == '?') {
6978
62.0k
      xmlParsePI(ctxt);
6979
62.0k
  }
6980
20.1M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
20.1M
    if (ctxt->instate == XML_PARSER_EOF)
6987
38.6k
        return;
6988
6989
20.0M
    ctxt->instate = XML_PARSER_DTD;
6990
20.0M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
19.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
19.9k
    xmlChar *version;
7006
19.9k
    const xmlChar *encoding;
7007
19.9k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
19.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
19.4k
  SKIP(5);
7014
19.4k
    } else {
7015
556
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
556
  return;
7017
556
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
19.4k
    oldstate = ctxt->instate;
7021
19.4k
    ctxt->instate = XML_PARSER_START;
7022
7023
19.4k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
19.4k
    version = xmlParseVersionInfo(ctxt);
7032
19.4k
    if (version == NULL)
7033
4.82k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
14.5k
    else {
7035
14.5k
  if (SKIP_BLANKS == 0) {
7036
1.39k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.39k
               "Space needed here\n");
7038
1.39k
  }
7039
14.5k
    }
7040
19.4k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
19.4k
    encoding = xmlParseEncodingDecl(ctxt);
7046
19.4k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
19.4k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
335
        ctxt->instate = oldstate;
7053
335
        return;
7054
335
    }
7055
19.0k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
7.55k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
7.55k
           "Missing encoding in text declaration\n");
7058
7.55k
    }
7059
7060
19.0k
    SKIP_BLANKS;
7061
19.0k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
8.98k
        SKIP(2);
7063
10.0k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
181
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
181
  NEXT;
7067
9.90k
    } else {
7068
9.90k
        int c;
7069
7070
9.90k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
413k
        while ((c = CUR) != 0) {
7072
411k
            NEXT;
7073
411k
            if (c == '>')
7074
7.61k
                break;
7075
411k
        }
7076
9.90k
    }
7077
7078
19.0k
    ctxt->instate = oldstate;
7079
19.0k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
60.2k
                       const xmlChar *SystemID) {
7096
60.2k
    xmlDetectSAX2(ctxt);
7097
60.2k
    GROW;
7098
7099
60.2k
    if ((ctxt->encoding == NULL) &&
7100
60.2k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
59.8k
        xmlChar start[4];
7102
59.8k
  xmlCharEncoding enc;
7103
7104
59.8k
  start[0] = RAW;
7105
59.8k
  start[1] = NXT(1);
7106
59.8k
  start[2] = NXT(2);
7107
59.8k
  start[3] = NXT(3);
7108
59.8k
  enc = xmlDetectCharEncoding(start, 4);
7109
59.8k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
20.3k
      xmlSwitchEncoding(ctxt, enc);
7111
59.8k
    }
7112
7113
60.2k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
18.3k
  xmlParseTextDecl(ctxt);
7115
18.3k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
304
      xmlHaltParser(ctxt);
7120
304
      return;
7121
304
  }
7122
18.3k
    }
7123
59.9k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
59.9k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
59.9k
    ctxt->instate = XML_PARSER_DTD;
7135
59.9k
    ctxt->external = 1;
7136
59.9k
    SKIP_BLANKS;
7137
4.43M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
4.39M
  GROW;
7139
4.39M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
12.4k
            xmlParseConditionalSections(ctxt);
7141
4.38M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
4.36M
            xmlParseMarkupDecl(ctxt);
7143
4.36M
        } else {
7144
20.3k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
20.3k
            xmlHaltParser(ctxt);
7146
20.3k
            return;
7147
20.3k
        }
7148
4.37M
        SKIP_BLANKS;
7149
4.37M
    }
7150
7151
39.6k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
39.6k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
4.66M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
4.66M
    xmlEntityPtr ent;
7175
4.66M
    xmlChar *val;
7176
4.66M
    int was_checked;
7177
4.66M
    xmlNodePtr list = NULL;
7178
4.66M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
4.66M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
4.66M
    if (NXT(1) == '#') {
7188
894k
  int i = 0;
7189
894k
  xmlChar out[16];
7190
894k
  int hex = NXT(2);
7191
894k
  int value = xmlParseCharRef(ctxt);
7192
7193
894k
  if (value == 0)
7194
302k
      return;
7195
592k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
467k
      if (value <= 0xFF) {
7202
430k
    out[0] = value;
7203
430k
    out[1] = 0;
7204
430k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
430k
        (!ctxt->disableSAX))
7206
378k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
430k
      } else {
7208
36.8k
    if ((hex == 'x') || (hex == 'X'))
7209
12.4k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
24.3k
    else
7211
24.3k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
36.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
36.8k
        (!ctxt->disableSAX))
7214
31.6k
        ctxt->sax->reference(ctxt->userData, out);
7215
36.8k
      }
7216
467k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
124k
      COPY_BUF(0 ,out, i, value);
7221
124k
      out[i] = 0;
7222
124k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
124k
    (!ctxt->disableSAX))
7224
113k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
124k
  }
7226
592k
  return;
7227
894k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
3.77M
    ent = xmlParseEntityRef(ctxt);
7233
3.77M
    if (ent == NULL) return;
7234
2.17M
    if (!ctxt->wellFormed)
7235
1.08M
  return;
7236
1.09M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
1.09M
    if ((ent->name == NULL) ||
7240
1.09M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
57.3k
  val = ent->content;
7242
57.3k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
57.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
57.3k
      (!ctxt->disableSAX))
7248
57.3k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
57.3k
  return;
7250
57.3k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
1.03M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
1.03M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
90.8k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
87.1k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
87.1k
  void *user_data;
7273
87.1k
  if (ctxt->userData == ctxt)
7274
87.1k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
87.1k
        ctxt->sizeentcopy = 0;
7280
7281
87.1k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
620
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
620
            xmlHaltParser(ctxt);
7284
620
            return;
7285
620
        }
7286
7287
86.5k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
86.5k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
54.0k
      ctxt->depth++;
7297
54.0k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
54.0k
                                                user_data, &list);
7299
54.0k
      ctxt->depth--;
7300
7301
54.0k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
32.4k
      ctxt->depth++;
7303
32.4k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
32.4k
                                     user_data, ctxt->depth, ent->URI,
7305
32.4k
             ent->ExternalID, &list);
7306
32.4k
      ctxt->depth--;
7307
32.4k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
86.5k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
86.5k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
86.5k
        ent->expandedSize = ctxt->sizeentcopy;
7316
86.5k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
4.59k
            xmlHaltParser(ctxt);
7318
4.59k
      xmlFreeNodeList(list);
7319
4.59k
      return;
7320
4.59k
  }
7321
81.9k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
81.9k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
44.9k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
44.9k
            if ((ctxt->replaceEntities == 0) ||
7333
44.9k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
44.9k
                ((list->type == XML_TEXT_NODE) &&
7335
34.2k
                 (list->next == NULL))) {
7336
34.2k
                ent->owner = 1;
7337
127k
                while (list != NULL) {
7338
93.0k
                    list->parent = (xmlNodePtr) ent;
7339
93.0k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
93.0k
                    if (list->next == NULL)
7342
34.2k
                        ent->last = list;
7343
93.0k
                    list = list->next;
7344
93.0k
                }
7345
34.2k
                list = NULL;
7346
34.2k
            } else {
7347
10.6k
                ent->owner = 0;
7348
222k
                while (list != NULL) {
7349
212k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
212k
                    list->doc = ctxt->myDoc;
7351
212k
                    if (list->next == NULL)
7352
10.6k
                        ent->last = list;
7353
212k
                    list = list->next;
7354
212k
                }
7355
10.6k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
10.6k
            }
7361
44.9k
  } else if ((ret != XML_ERR_OK) &&
7362
36.9k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
19.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
19.9k
         "Entity '%s' failed to parse\n", ent->name);
7365
19.9k
            if (ent->content != NULL)
7366
7.53k
                ent->content[0] = 0;
7367
19.9k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
81.9k
        was_checked = 0;
7374
81.9k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
1.03M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
197k
  if (was_checked != 0) {
7389
156k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
156k
      if (ctxt->userData == ctxt)
7396
156k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
156k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
6.55k
    ctxt->depth++;
7402
6.55k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
6.55k
           ent->content, user_data, NULL);
7404
6.55k
    ctxt->depth--;
7405
150k
      } else if (ent->etype ==
7406
150k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
150k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
150k
    ctxt->depth++;
7410
150k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
150k
         ctxt->sax, user_data, ctxt->depth,
7412
150k
         ent->URI, ent->ExternalID, NULL);
7413
150k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
150k
                ctxt->sizeentities = oldsizeentities;
7417
150k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
156k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
156k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
156k
  }
7429
197k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
197k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
35.8k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
35.8k
  }
7437
197k
  return;
7438
197k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
832k
    if ((was_checked != 0) &&
7445
832k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
180
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
832k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
832k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
100k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
100k
  return;
7458
100k
    }
7459
7460
731k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
731k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
731k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
731k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
229k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
229k
    cur = ent->children;
7492
329k
    while (cur != NULL) {
7493
329k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
329k
        if (nw != NULL) {
7495
329k
      if (nw->_private == NULL)
7496
329k
          nw->_private = cur->_private;
7497
329k
      if (firstChild == NULL){
7498
229k
          firstChild = nw;
7499
229k
      }
7500
329k
      nw = xmlAddChild(ctxt->node, nw);
7501
329k
        }
7502
329k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
229k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
229k
          (nw != NULL) &&
7509
229k
          (nw->type == XML_ELEMENT_NODE) &&
7510
229k
          (nw->children == NULL))
7511
4.23k
          nw->extra = 1;
7512
7513
229k
      break;
7514
229k
        }
7515
100k
        cur = cur->next;
7516
100k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
502k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
502k
    xmlNodePtr nw = NULL, cur, next, last,
7523
502k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
502k
    cur = ent->children;
7532
502k
    ent->children = NULL;
7533
502k
    last = ent->last;
7534
502k
    ent->last = NULL;
7535
1.39M
    while (cur != NULL) {
7536
1.39M
        next = cur->next;
7537
1.39M
        cur->next = NULL;
7538
1.39M
        cur->parent = NULL;
7539
1.39M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
1.39M
        if (nw != NULL) {
7541
1.39M
      if (nw->_private == NULL)
7542
1.39M
          nw->_private = cur->_private;
7543
1.39M
      if (firstChild == NULL){
7544
502k
          firstChild = cur;
7545
502k
      }
7546
1.39M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
1.39M
        }
7548
1.39M
        xmlAddChild(ctxt->node, cur);
7549
1.39M
        if (cur == last)
7550
502k
      break;
7551
895k
        cur = next;
7552
895k
    }
7553
502k
    if (ent->owner == 0)
7554
10.6k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
502k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
731k
      ctxt->nodemem = 0;
7582
731k
      ctxt->nodelen = 0;
7583
731k
      return;
7584
731k
  }
7585
731k
    }
7586
731k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
6.55M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
6.55M
    const xmlChar *name;
7621
6.55M
    xmlEntityPtr ent = NULL;
7622
7623
6.55M
    GROW;
7624
6.55M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
6.55M
    if (RAW != '&')
7628
0
        return(NULL);
7629
6.55M
    NEXT;
7630
6.55M
    name = xmlParseName(ctxt);
7631
6.55M
    if (name == NULL) {
7632
695k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
695k
           "xmlParseEntityRef: no name\n");
7634
695k
        return(NULL);
7635
695k
    }
7636
5.86M
    if (RAW != ';') {
7637
408k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
408k
  return(NULL);
7639
408k
    }
7640
5.45M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
5.45M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
4.11M
        ent = xmlGetPredefinedEntity(name);
7647
4.11M
        if (ent != NULL)
7648
401k
            return(ent);
7649
4.11M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
5.05M
    if (ctxt->sax != NULL) {
7656
5.05M
  if (ctxt->sax->getEntity != NULL)
7657
5.05M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
5.05M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
5.05M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
69.3k
      ent = xmlGetPredefinedEntity(name);
7661
5.05M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
5.05M
      (ctxt->userData==ctxt)) {
7663
140k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
140k
  }
7665
5.05M
    }
7666
5.05M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
5.05M
    if (ent == NULL) {
7690
937k
  if ((ctxt->standalone == 1) ||
7691
937k
      ((ctxt->hasExternalSubset == 0) &&
7692
922k
       (ctxt->hasPErefs == 0))) {
7693
582k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
582k
         "Entity '%s' not defined\n", name);
7695
582k
  } else {
7696
355k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
355k
         "Entity '%s' not defined\n", name);
7698
355k
      if ((ctxt->inSubset == 0) &&
7699
355k
    (ctxt->sax != NULL) &&
7700
355k
    (ctxt->sax->reference != NULL)) {
7701
343k
    ctxt->sax->reference(ctxt->userData, name);
7702
343k
      }
7703
355k
  }
7704
937k
  ctxt->valid = 0;
7705
937k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
4.11M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
3.60k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
3.60k
     "Entity reference to unparsed entity %s\n", name);
7715
3.60k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
4.11M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
4.11M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
25.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
25.7k
       "Attribute references external entity '%s'\n", name);
7726
25.7k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
4.08M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
4.08M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
2.05M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
26.1k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
3.14k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
26.1k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
26.1k
        }
7740
2.05M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
21.0k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
21.0k
                    "'<' in entity '%s' is not allowed in attributes "
7743
21.0k
                    "values\n", name);
7744
2.05M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
2.03M
    else {
7750
2.03M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
2.03M
      default:
7758
2.03M
      break;
7759
2.03M
  }
7760
2.03M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
5.05M
    return(ent);
7769
5.05M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
35.0M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
35.0M
    xmlChar *name;
7805
35.0M
    const xmlChar *ptr;
7806
35.0M
    xmlChar cur;
7807
35.0M
    xmlEntityPtr ent = NULL;
7808
7809
35.0M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
35.0M
    ptr = *str;
7812
35.0M
    cur = *ptr;
7813
35.0M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
35.0M
    ptr++;
7817
35.0M
    name = xmlParseStringName(ctxt, &ptr);
7818
35.0M
    if (name == NULL) {
7819
3.06k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
3.06k
           "xmlParseStringEntityRef: no name\n");
7821
3.06k
  *str = ptr;
7822
3.06k
  return(NULL);
7823
3.06k
    }
7824
35.0M
    if (*ptr != ';') {
7825
29.8k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
29.8k
        xmlFree(name);
7827
29.8k
  *str = ptr;
7828
29.8k
  return(NULL);
7829
29.8k
    }
7830
35.0M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
35.0M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
31.7M
        ent = xmlGetPredefinedEntity(name);
7838
31.7M
        if (ent != NULL) {
7839
30.6k
            xmlFree(name);
7840
30.6k
            *str = ptr;
7841
30.6k
            return(ent);
7842
30.6k
        }
7843
31.7M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
35.0M
    if (ctxt->sax != NULL) {
7850
35.0M
  if (ctxt->sax->getEntity != NULL)
7851
35.0M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
35.0M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
84.0k
      ent = xmlGetPredefinedEntity(name);
7854
35.0M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
2.64M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
2.64M
  }
7857
35.0M
    }
7858
35.0M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
35.0M
    if (ent == NULL) {
7885
2.64M
  if ((ctxt->standalone == 1) ||
7886
2.64M
      ((ctxt->hasExternalSubset == 0) &&
7887
2.63M
       (ctxt->hasPErefs == 0))) {
7888
2.60M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
2.60M
         "Entity '%s' not defined\n", name);
7890
2.60M
  } else {
7891
32.7k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
32.7k
        "Entity '%s' not defined\n",
7893
32.7k
        name);
7894
32.7k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
2.64M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
32.3M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
1.51k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
1.51k
     "Entity reference to unparsed entity %s\n", name);
7906
1.51k
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
32.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
32.3M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
1.85k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
1.85k
   "Attribute references external entity '%s'\n", name);
7917
1.85k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
32.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
32.3M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
32.0M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
19.4k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.07k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
19.4k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
19.4k
        }
7931
32.0M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
81.1k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
81.1k
                    "'<' in entity '%s' is not allowed in attributes "
7934
81.1k
                    "values\n", name);
7935
32.0M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
325k
    else {
7941
325k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
325k
      default:
7949
325k
      break;
7950
325k
  }
7951
325k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
35.0M
    xmlFree(name);
7961
35.0M
    *str = ptr;
7962
35.0M
    return(ent);
7963
35.0M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
15.6M
{
8000
15.6M
    const xmlChar *name;
8001
15.6M
    xmlEntityPtr entity = NULL;
8002
15.6M
    xmlParserInputPtr input;
8003
8004
15.6M
    if (RAW != '%')
8005
0
        return;
8006
15.6M
    NEXT;
8007
15.6M
    name = xmlParseName(ctxt);
8008
15.6M
    if (name == NULL) {
8009
88.6k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
88.6k
  return;
8011
88.6k
    }
8012
15.5M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
15.5M
    if (RAW != ';') {
8016
15.0k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
15.0k
        return;
8018
15.0k
    }
8019
8020
15.5M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
15.5M
    if ((ctxt->sax != NULL) &&
8026
15.5M
  (ctxt->sax->getParameterEntity != NULL))
8027
15.5M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
15.5M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
15.5M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
1.72M
  if ((ctxt->standalone == 1) ||
8040
1.72M
      ((ctxt->hasExternalSubset == 0) &&
8041
1.72M
       (ctxt->hasPErefs == 0))) {
8042
3.49k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
3.49k
            "PEReference: %%%s; not found\n",
8044
3.49k
            name);
8045
1.72M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
1.72M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
300k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
300k
                                 "PEReference: %%%s; not found\n",
8056
300k
                                 name, NULL);
8057
300k
            } else
8058
1.42M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
1.42M
                              "PEReference: %%%s; not found\n",
8060
1.42M
                              name, NULL);
8061
1.72M
            ctxt->valid = 0;
8062
1.72M
  }
8063
13.8M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
13.8M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
13.8M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
13.8M
  } else {
8073
13.8M
            xmlChar start[4];
8074
13.8M
            xmlCharEncoding enc;
8075
13.8M
            unsigned long parentConsumed;
8076
13.8M
            xmlEntityPtr oldEnt;
8077
8078
13.8M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
13.8M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
13.8M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
13.8M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
13.8M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
13.8M
    (ctxt->replaceEntities == 0) &&
8084
13.8M
    (ctxt->validate == 0))
8085
564
    return;
8086
8087
13.8M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
233
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
233
                xmlHaltParser(ctxt);
8090
233
                return;
8091
233
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
13.8M
            parentConsumed = ctxt->input->parentConsumed;
8095
13.8M
            oldEnt = ctxt->input->entity;
8096
13.8M
            if ((oldEnt == NULL) ||
8097
13.8M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
13.6M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
403k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
403k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
403k
                                     ctxt->input->cur - ctxt->input->base);
8102
403k
            }
8103
8104
13.8M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
13.8M
      if (xmlPushInput(ctxt, input) < 0) {
8106
11.2k
                xmlFreeInputStream(input);
8107
11.2k
    return;
8108
11.2k
            }
8109
8110
13.8M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
13.8M
            input->parentConsumed = parentConsumed;
8113
8114
13.8M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
6.29k
                GROW
8125
6.29k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
6.29k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
6.25k
                    start[0] = RAW;
8129
6.25k
                    start[1] = NXT(1);
8130
6.25k
                    start[2] = NXT(2);
8131
6.25k
                    start[3] = NXT(3);
8132
6.25k
                    enc = xmlDetectCharEncoding(start, 4);
8133
6.25k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
654
                        xmlSwitchEncoding(ctxt, enc);
8135
654
                    }
8136
6.25k
                }
8137
8138
6.29k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
6.29k
                    (IS_BLANK_CH(NXT(5)))) {
8140
469
                    xmlParseTextDecl(ctxt);
8141
469
                }
8142
6.29k
            }
8143
13.8M
  }
8144
13.8M
    }
8145
15.5M
    ctxt->hasPErefs = 1;
8146
15.5M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
1.29k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
1.29k
    xmlParserInputPtr input;
8162
1.29k
    xmlBufferPtr buf;
8163
1.29k
    int l, c;
8164
1.29k
    int count = 0;
8165
8166
1.29k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
1.29k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
1.29k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
1.29k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
1.29k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
1.29k
    buf = xmlBufferCreate();
8180
1.29k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
1.29k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
1.29k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
1.29k
    if (input == NULL) {
8189
139
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
139
              "xmlLoadEntityContent input error");
8191
139
  xmlBufferFree(buf);
8192
139
        return(-1);
8193
139
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
1.15k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
1.15k
    GROW;
8206
1.15k
    c = CUR_CHAR(l);
8207
2.31M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
2.31M
           (IS_CHAR(c))) {
8209
2.31M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
2.31M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
22.1k
      count = 0;
8212
22.1k
      GROW;
8213
22.1k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
22.1k
  }
8218
2.31M
  NEXTL(l);
8219
2.31M
  c = CUR_CHAR(l);
8220
2.31M
  if (c == 0) {
8221
852
      count = 0;
8222
852
      GROW;
8223
852
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
852
      c = CUR_CHAR(l);
8228
852
  }
8229
2.31M
    }
8230
8231
1.15k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
556
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
556
        xmlPopInput(ctxt);
8234
602
    } else if (!IS_CHAR(c)) {
8235
602
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
602
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
602
                    c);
8238
602
  xmlBufferFree(buf);
8239
602
  return(-1);
8240
602
    }
8241
556
    entity->content = buf->content;
8242
556
    entity->length = buf->use;
8243
556
    buf->content = NULL;
8244
556
    xmlBufferFree(buf);
8245
8246
556
    return(0);
8247
1.15k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
151k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
151k
    const xmlChar *ptr;
8283
151k
    xmlChar cur;
8284
151k
    xmlChar *name;
8285
151k
    xmlEntityPtr entity = NULL;
8286
8287
151k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
151k
    ptr = *str;
8289
151k
    cur = *ptr;
8290
151k
    if (cur != '%')
8291
0
        return(NULL);
8292
151k
    ptr++;
8293
151k
    name = xmlParseStringName(ctxt, &ptr);
8294
151k
    if (name == NULL) {
8295
4.73k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
4.73k
           "xmlParseStringPEReference: no name\n");
8297
4.73k
  *str = ptr;
8298
4.73k
  return(NULL);
8299
4.73k
    }
8300
146k
    cur = *ptr;
8301
146k
    if (cur != ';') {
8302
659
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
659
  xmlFree(name);
8304
659
  *str = ptr;
8305
659
  return(NULL);
8306
659
    }
8307
145k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
145k
    if ((ctxt->sax != NULL) &&
8313
145k
  (ctxt->sax->getParameterEntity != NULL))
8314
145k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
145k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
145k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
29.5k
  if ((ctxt->standalone == 1) ||
8330
29.5k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
29.5k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
29.5k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
29.5k
        "PEReference: %%%s; not found\n",
8343
29.5k
        name, NULL);
8344
29.5k
      ctxt->valid = 0;
8345
29.5k
  }
8346
116k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
116k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
116k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
116k
    }
8357
145k
    ctxt->hasPErefs = 1;
8358
145k
    xmlFree(name);
8359
145k
    *str = ptr;
8360
145k
    return(entity);
8361
145k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
518k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
518k
    const xmlChar *name = NULL;
8382
518k
    xmlChar *ExternalID = NULL;
8383
518k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
518k
    SKIP(9);
8389
8390
518k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
518k
    name = xmlParseName(ctxt);
8396
518k
    if (name == NULL) {
8397
3.63k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
3.63k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
3.63k
    }
8400
518k
    ctxt->intSubName = name;
8401
8402
518k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
518k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
518k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
187k
        ctxt->hasExternalSubset = 1;
8411
187k
    }
8412
518k
    ctxt->extSubURI = URI;
8413
518k
    ctxt->extSubSystem = ExternalID;
8414
8415
518k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
518k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
518k
  (!ctxt->disableSAX))
8422
493k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
518k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
518k
    if (RAW == '[')
8431
331k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
186k
    if (RAW != '>') {
8437
59.0k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
59.0k
    }
8439
186k
    NEXT;
8440
186k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
344k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
344k
    if (RAW == '[') {
8457
344k
        int baseInputNr = ctxt->inputNr;
8458
344k
        ctxt->instate = XML_PARSER_DTD;
8459
344k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
344k
  SKIP_BLANKS;
8466
16.1M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
16.1M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
15.9M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
15.9M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
15.9M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
15.7M
          xmlParseMarkupDecl(ctxt);
8478
15.7M
            } else if (RAW == '%') {
8479
116k
          xmlParsePEReference(ctxt);
8480
121k
            } else {
8481
121k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
121k
                        "xmlParseInternalSubset: error detected in"
8483
121k
                        " Markup declaration\n");
8484
121k
                xmlHaltParser(ctxt);
8485
121k
                return;
8486
121k
            }
8487
15.8M
      SKIP_BLANKS;
8488
15.8M
  }
8489
223k
  if (RAW == ']') {
8490
189k
      NEXT;
8491
189k
      SKIP_BLANKS;
8492
189k
  }
8493
223k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
223k
    if (RAW != '>') {
8499
36.1k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
36.1k
  return;
8501
36.1k
    }
8502
187k
    NEXT;
8503
187k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
5.55M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
5.55M
    const xmlChar *name;
8544
5.55M
    xmlChar *val;
8545
8546
5.55M
    *value = NULL;
8547
5.55M
    GROW;
8548
5.55M
    name = xmlParseName(ctxt);
8549
5.55M
    if (name == NULL) {
8550
1.77M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
1.77M
                 "error parsing attribute name\n");
8552
1.77M
        return(NULL);
8553
1.77M
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
3.78M
    SKIP_BLANKS;
8559
3.78M
    if (RAW == '=') {
8560
2.97M
        NEXT;
8561
2.97M
  SKIP_BLANKS;
8562
2.97M
  val = xmlParseAttValue(ctxt);
8563
2.97M
  ctxt->instate = XML_PARSER_CONTENT;
8564
2.97M
    } else {
8565
801k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
801k
         "Specification mandates value for attribute %s\n", name);
8567
801k
  return(name);
8568
801k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
2.97M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
79.7k
  if (!xmlCheckLanguageID(val)) {
8577
64.4k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
64.4k
              "Malformed value for xml:lang : %s\n",
8579
64.4k
        val, NULL);
8580
64.4k
  }
8581
79.7k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
2.97M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
12.2k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
735
      *(ctxt->space) = 0;
8589
11.5k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
325
      *(ctxt->space) = 1;
8591
11.2k
  else {
8592
11.2k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
11.2k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
11.2k
                                 val, NULL);
8595
11.2k
  }
8596
12.2k
    }
8597
8598
2.97M
    *value = val;
8599
2.97M
    return(name);
8600
3.78M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
5.67M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
5.67M
    const xmlChar *name;
8634
5.67M
    const xmlChar *attname;
8635
5.67M
    xmlChar *attvalue;
8636
5.67M
    const xmlChar **atts = ctxt->atts;
8637
5.67M
    int nbatts = 0;
8638
5.67M
    int maxatts = ctxt->maxatts;
8639
5.67M
    int i;
8640
8641
5.67M
    if (RAW != '<') return(NULL);
8642
5.67M
    NEXT1;
8643
8644
5.67M
    name = xmlParseName(ctxt);
8645
5.67M
    if (name == NULL) {
8646
494k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
494k
       "xmlParseStartTag: invalid element name\n");
8648
494k
        return(NULL);
8649
494k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
5.18M
    SKIP_BLANKS;
8657
5.18M
    GROW;
8658
8659
7.64M
    while (((RAW != '>') &&
8660
7.64M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
7.64M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
5.55M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
5.55M
        if (attname == NULL) {
8664
1.77M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
1.77M
         "xmlParseStartTag: problem parsing attributes\n");
8666
1.77M
      break;
8667
1.77M
  }
8668
3.78M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
4.04M
      for (i = 0; i < nbatts;i += 2) {
8675
1.17M
          if (xmlStrEqual(atts[i], attname)) {
8676
34.8k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
34.8k
        xmlFree(attvalue);
8678
34.8k
        goto failed;
8679
34.8k
    }
8680
1.17M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
2.86M
      if (atts == NULL) {
8685
137k
          maxatts = 22; /* allow for 10 attrs by default */
8686
137k
          atts = (const xmlChar **)
8687
137k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
137k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
137k
    ctxt->atts = atts;
8695
137k
    ctxt->maxatts = maxatts;
8696
2.72M
      } else if (nbatts + 4 > maxatts) {
8697
95
          const xmlChar **n;
8698
8699
95
          maxatts *= 2;
8700
95
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
95
               maxatts * sizeof(const xmlChar *));
8702
95
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
95
    atts = n;
8709
95
    ctxt->atts = atts;
8710
95
    ctxt->maxatts = maxatts;
8711
95
      }
8712
2.86M
      atts[nbatts++] = attname;
8713
2.86M
      atts[nbatts++] = attvalue;
8714
2.86M
      atts[nbatts] = NULL;
8715
2.86M
      atts[nbatts + 1] = NULL;
8716
2.86M
  } else {
8717
879k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
879k
  }
8720
8721
3.78M
failed:
8722
8723
3.78M
  GROW
8724
3.78M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
1.31M
      break;
8726
2.46M
  if (SKIP_BLANKS == 0) {
8727
1.47M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
1.47M
         "attributes construct error\n");
8729
1.47M
  }
8730
2.46M
  SHRINK;
8731
2.46M
        GROW;
8732
2.46M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
5.18M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
5.18M
  (!ctxt->disableSAX)) {
8739
4.80M
  if (nbatts > 0)
8740
1.83M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
2.97M
  else
8742
2.97M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
4.80M
    }
8744
8745
5.18M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
6.97M
        for (i = 1;i < nbatts;i+=2)
8748
2.86M
      if (atts[i] != NULL)
8749
2.86M
         xmlFree((xmlChar *) atts[i]);
8750
4.11M
    }
8751
5.18M
    return(name);
8752
5.18M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.14M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.14M
    const xmlChar *name;
8772
8773
1.14M
    GROW;
8774
1.14M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.14M
    SKIP(2);
8780
8781
1.14M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.14M
    GROW;
8787
1.14M
    SKIP_BLANKS;
8788
1.14M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
234k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
234k
    } else
8791
908k
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.14M
    if (name != (xmlChar*)1) {
8800
442k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
442k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
442k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
442k
                    ctxt->name, line, name);
8804
442k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.14M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.14M
  (!ctxt->disableSAX))
8811
1.03M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.14M
    namePop(ctxt);
8814
1.14M
    spacePop(ctxt);
8815
1.14M
    return;
8816
1.14M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
9.92M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
9.92M
    int i;
8858
8859
9.92M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
21.5M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
13.9M
        if (ctxt->nsTab[i] == prefix) {
8862
1.98M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
24.3k
          return(NULL);
8864
1.95M
      return(ctxt->nsTab[i + 1]);
8865
1.98M
  }
8866
7.60M
    return(NULL);
8867
9.58M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
18.6M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
18.6M
    const xmlChar *l, *p;
8886
8887
18.6M
    GROW;
8888
8889
18.6M
    l = xmlParseNCName(ctxt);
8890
18.6M
    if (l == NULL) {
8891
2.18M
        if (CUR == ':') {
8892
73.3k
      l = xmlParseName(ctxt);
8893
73.3k
      if (l != NULL) {
8894
73.3k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
73.3k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
73.3k
    *prefix = NULL;
8897
73.3k
    return(l);
8898
73.3k
      }
8899
73.3k
  }
8900
2.11M
        return(NULL);
8901
2.18M
    }
8902
16.4M
    if (CUR == ':') {
8903
4.40M
        NEXT;
8904
4.40M
  p = l;
8905
4.40M
  l = xmlParseNCName(ctxt);
8906
4.40M
  if (l == NULL) {
8907
248k
      xmlChar *tmp;
8908
8909
248k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
248k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
248k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
248k
      l = xmlParseNmtoken(ctxt);
8914
248k
      if (l == NULL) {
8915
146k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
146k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
146k
            } else {
8919
102k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
102k
    xmlFree((char *)l);
8921
102k
      }
8922
248k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
248k
      if (tmp != NULL) xmlFree(tmp);
8924
248k
      *prefix = NULL;
8925
248k
      return(p);
8926
248k
  }
8927
4.15M
  if (CUR == ':') {
8928
121k
      xmlChar *tmp;
8929
8930
121k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
121k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
121k
      NEXT;
8933
121k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
121k
      if (tmp != NULL) {
8935
96.4k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
96.4k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
96.4k
    if (tmp != NULL) xmlFree(tmp);
8938
96.4k
    *prefix = p;
8939
96.4k
    return(l);
8940
96.4k
      }
8941
25.2k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
25.2k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
25.2k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
25.2k
      if (tmp != NULL) xmlFree(tmp);
8946
25.2k
      *prefix = p;
8947
25.2k
      return(l);
8948
25.2k
  }
8949
4.03M
  *prefix = p;
8950
4.03M
    } else
8951
12.0M
        *prefix = NULL;
8952
16.0M
    return(l);
8953
16.4M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
794k
                        xmlChar const *prefix) {
8971
794k
    const xmlChar *cmp;
8972
794k
    const xmlChar *in;
8973
794k
    const xmlChar *ret;
8974
794k
    const xmlChar *prefix2;
8975
8976
794k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
794k
    GROW;
8979
794k
    in = ctxt->input->cur;
8980
8981
794k
    cmp = prefix;
8982
1.83M
    while (*in != 0 && *in == *cmp) {
8983
1.04M
  ++in;
8984
1.04M
  ++cmp;
8985
1.04M
    }
8986
794k
    if ((*cmp == 0) && (*in == ':')) {
8987
639k
        in++;
8988
639k
  cmp = name;
8989
3.40M
  while (*in != 0 && *in == *cmp) {
8990
2.76M
      ++in;
8991
2.76M
      ++cmp;
8992
2.76M
  }
8993
639k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
402k
            ctxt->input->col += in - ctxt->input->cur;
8996
402k
      ctxt->input->cur = in;
8997
402k
      return((const xmlChar*) 1);
8998
402k
  }
8999
639k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
391k
    ret = xmlParseQName (ctxt, &prefix2);
9004
391k
    if ((ret == name) && (prefix == prefix2))
9005
9.83k
  return((const xmlChar*) 1);
9006
381k
    return ret;
9007
391k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
4.50k
    const xmlChar *oldbase = ctxt->input->base;\
9045
4.50k
    GROW;\
9046
4.50k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
4.50k
        return(NULL);\
9048
4.50k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
4.50k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
9.53M
{
9059
9.53M
    xmlChar limit = 0;
9060
9.53M
    const xmlChar *in = NULL, *start, *end, *last;
9061
9.53M
    xmlChar *ret = NULL;
9062
9.53M
    int line, col;
9063
9.53M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
2.65M
                    XML_MAX_HUGE_LENGTH :
9065
9.53M
                    XML_MAX_TEXT_LENGTH;
9066
9067
9.53M
    GROW;
9068
9.53M
    in = (xmlChar *) CUR_PTR;
9069
9.53M
    line = ctxt->input->line;
9070
9.53M
    col = ctxt->input->col;
9071
9.53M
    if (*in != '"' && *in != '\'') {
9072
178k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
178k
        return (NULL);
9074
178k
    }
9075
9.35M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
9.35M
    limit = *in++;
9083
9.35M
    col++;
9084
9.35M
    end = ctxt->input->end;
9085
9.35M
    start = in;
9086
9.35M
    if (in >= end) {
9087
447
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
447
    }
9089
9.35M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
445k
  while ((in < end) && (*in != limit) &&
9094
445k
         ((*in == 0x20) || (*in == 0x9) ||
9095
438k
          (*in == 0xA) || (*in == 0xD))) {
9096
153k
      if (*in == 0xA) {
9097
56.3k
          line++; col = 1;
9098
97.2k
      } else {
9099
97.2k
          col++;
9100
97.2k
      }
9101
153k
      in++;
9102
153k
      start = in;
9103
153k
      if (in >= end) {
9104
124
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
124
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
124
      }
9111
153k
  }
9112
2.62M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
2.62M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
2.34M
      col++;
9115
2.34M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
2.33M
      if (in >= end) {
9117
192
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
192
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
192
      }
9124
2.33M
  }
9125
292k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
309k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
430k
  while ((in < end) && (*in != limit) &&
9131
430k
         ((*in == 0x20) || (*in == 0x9) ||
9132
236k
          (*in == 0xA) || (*in == 0xD))) {
9133
138k
      if (*in == 0xA) {
9134
40.2k
          line++, col = 1;
9135
97.8k
      } else {
9136
97.8k
          col++;
9137
97.8k
      }
9138
138k
      in++;
9139
138k
      if (in >= end) {
9140
193
    const xmlChar *oldbase = ctxt->input->base;
9141
193
    GROW;
9142
193
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
193
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
193
    end = ctxt->input->end;
9151
193
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
193
      }
9157
138k
  }
9158
292k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
292k
  if (*in != limit) goto need_complex;
9164
9.06M
    } else {
9165
113M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
113M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
104M
      in++;
9168
104M
      col++;
9169
104M
      if (in >= end) {
9170
3.74k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
3.74k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
3.74k
      }
9177
104M
  }
9178
9.06M
  last = in;
9179
9.06M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
9.06M
  if (*in != limit) goto need_complex;
9185
9.06M
    }
9186
7.37M
    in++;
9187
7.37M
    col++;
9188
7.37M
    if (len != NULL) {
9189
5.06M
        if (alloc) *alloc = 0;
9190
5.06M
        *len = last - start;
9191
5.06M
        ret = (xmlChar *) start;
9192
5.06M
    } else {
9193
2.31M
        if (alloc) *alloc = 1;
9194
2.31M
        ret = xmlStrndup(start, last - start);
9195
2.31M
    }
9196
7.37M
    CUR_PTR = in;
9197
7.37M
    ctxt->input->line = line;
9198
7.37M
    ctxt->input->col = col;
9199
7.37M
    return ret;
9200
1.98M
need_complex:
9201
1.98M
    if (alloc) *alloc = 1;
9202
1.98M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
9.35M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
8.21M
{
9226
8.21M
    const xmlChar *name;
9227
8.21M
    xmlChar *val, *internal_val = NULL;
9228
8.21M
    int normalize = 0;
9229
9230
8.21M
    *value = NULL;
9231
8.21M
    GROW;
9232
8.21M
    name = xmlParseQName(ctxt, prefix);
9233
8.21M
    if (name == NULL) {
9234
1.32M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
1.32M
                       "error parsing attribute name\n");
9236
1.32M
        return (NULL);
9237
1.32M
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
6.88M
    if (ctxt->attsSpecial != NULL) {
9243
746k
        int type;
9244
9245
746k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
746k
                                                 pref, elem, *prefix, name);
9247
746k
        if (type != 0)
9248
297k
            normalize = 1;
9249
746k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
6.88M
    SKIP_BLANKS;
9255
6.88M
    if (RAW == '=') {
9256
6.25M
        NEXT;
9257
6.25M
        SKIP_BLANKS;
9258
6.25M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
6.25M
        if (val == NULL)
9260
93.0k
            return (NULL);
9261
6.16M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
292k
      if (*alloc) {
9269
98.9k
          const xmlChar *val2;
9270
9271
98.9k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
98.9k
    if ((val2 != NULL) && (val2 != val)) {
9273
10.4k
        xmlFree(val);
9274
10.4k
        val = (xmlChar *) val2;
9275
10.4k
    }
9276
98.9k
      }
9277
292k
  }
9278
6.16M
        ctxt->instate = XML_PARSER_CONTENT;
9279
6.16M
    } else {
9280
629k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
629k
                          "Specification mandates value for attribute %s\n",
9282
629k
                          name);
9283
629k
        return (name);
9284
629k
    }
9285
9286
6.16M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
304k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
83.4k
            internal_val = xmlStrndup(val, *len);
9294
83.4k
            if (!xmlCheckLanguageID(internal_val)) {
9295
65.7k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
65.7k
                              "Malformed value for xml:lang : %s\n",
9297
65.7k
                              internal_val, NULL);
9298
65.7k
            }
9299
83.4k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
304k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
9.54k
            internal_val = xmlStrndup(val, *len);
9306
9.54k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
424
                *(ctxt->space) = 0;
9308
9.12k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
351
                *(ctxt->space) = 1;
9310
8.76k
            else {
9311
8.76k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
8.76k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
8.76k
                              internal_val, NULL);
9314
8.76k
            }
9315
9.54k
        }
9316
304k
        if (internal_val) {
9317
93.0k
            xmlFree(internal_val);
9318
93.0k
        }
9319
304k
    }
9320
9321
6.16M
    *value = val;
9322
6.16M
    return (name);
9323
6.88M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
10.0M
                  const xmlChar **URI, int *tlen) {
9356
10.0M
    const xmlChar *localname;
9357
10.0M
    const xmlChar *prefix;
9358
10.0M
    const xmlChar *attname;
9359
10.0M
    const xmlChar *aprefix;
9360
10.0M
    const xmlChar *nsname;
9361
10.0M
    xmlChar *attvalue;
9362
10.0M
    const xmlChar **atts = ctxt->atts;
9363
10.0M
    int maxatts = ctxt->maxatts;
9364
10.0M
    int nratts, nbatts, nbdef, inputid;
9365
10.0M
    int i, j, nbNs, attval;
9366
10.0M
    unsigned long cur;
9367
10.0M
    int nsNr = ctxt->nsNr;
9368
9369
10.0M
    if (RAW != '<') return(NULL);
9370
10.0M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
10.0M
    SHRINK;
9380
10.0M
    cur = ctxt->input->cur - ctxt->input->base;
9381
10.0M
    inputid = ctxt->input->id;
9382
10.0M
    nbatts = 0;
9383
10.0M
    nratts = 0;
9384
10.0M
    nbdef = 0;
9385
10.0M
    nbNs = 0;
9386
10.0M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
10.0M
    ctxt->nsNr = nsNr;
9389
9390
10.0M
    localname = xmlParseQName(ctxt, &prefix);
9391
10.0M
    if (localname == NULL) {
9392
767k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
767k
           "StartTag: invalid element name\n");
9394
767k
        return(NULL);
9395
767k
    }
9396
9.26M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
9.26M
    SKIP_BLANKS;
9404
9.26M
    GROW;
9405
9406
11.6M
    while (((RAW != '>') &&
9407
11.6M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
11.6M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
8.21M
  int len = -1, alloc = 0;
9410
9411
8.21M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
8.21M
                               &aprefix, &attvalue, &len, &alloc);
9413
8.21M
        if (attname == NULL) {
9414
1.41M
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
1.41M
           "xmlParseStartTag: problem parsing attributes\n");
9416
1.41M
      break;
9417
1.41M
  }
9418
6.79M
        if (attvalue == NULL)
9419
629k
            goto next_attr;
9420
6.16M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
6.16M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
224k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
224k
            xmlURIPtr uri;
9425
9426
224k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
224k
            if (*URL != 0) {
9434
217k
                uri = xmlParseURI((const char *) URL);
9435
217k
                if (uri == NULL) {
9436
87.8k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
87.8k
                             "xmlns: '%s' is not a valid URI\n",
9438
87.8k
                                       URL, NULL, NULL);
9439
129k
                } else {
9440
129k
                    if (uri->scheme == NULL) {
9441
65.4k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
65.4k
                                  "xmlns: URI %s is not absolute\n",
9443
65.4k
                                  URL, NULL, NULL);
9444
65.4k
                    }
9445
129k
                    xmlFreeURI(uri);
9446
129k
                }
9447
217k
                if (URL == ctxt->str_xml_ns) {
9448
30
                    if (attname != ctxt->str_xml) {
9449
30
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
30
                     "xml namespace URI cannot be the default namespace\n",
9451
30
                                 NULL, NULL, NULL);
9452
30
                    }
9453
30
                    goto next_attr;
9454
30
                }
9455
217k
                if ((len == 29) &&
9456
217k
                    (xmlStrEqual(URL,
9457
5.69k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
1.10k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
1.10k
                         "reuse of the xmlns namespace name is forbidden\n",
9460
1.10k
                             NULL, NULL, NULL);
9461
1.10k
                    goto next_attr;
9462
1.10k
                }
9463
217k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
242k
            for (j = 1;j <= nbNs;j++)
9468
32.7k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
13.3k
                    break;
9470
223k
            if (j <= nbNs)
9471
13.3k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
209k
            else
9473
209k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
5.93M
        } else if (aprefix == ctxt->str_xmlns) {
9476
598k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
598k
            xmlURIPtr uri;
9478
9479
598k
            if (attname == ctxt->str_xml) {
9480
6.98k
                if (URL != ctxt->str_xml_ns) {
9481
6.77k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
6.77k
                             "xml namespace prefix mapped to wrong URI\n",
9483
6.77k
                             NULL, NULL, NULL);
9484
6.77k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
6.98k
                goto next_attr;
9489
6.98k
            }
9490
591k
            if (URL == ctxt->str_xml_ns) {
9491
326
                if (attname != ctxt->str_xml) {
9492
326
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
326
                             "xml namespace URI mapped to wrong prefix\n",
9494
326
                             NULL, NULL, NULL);
9495
326
                }
9496
326
                goto next_attr;
9497
326
            }
9498
591k
            if (attname == ctxt->str_xmlns) {
9499
1.75k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
1.75k
                         "redefinition of the xmlns prefix is forbidden\n",
9501
1.75k
                         NULL, NULL, NULL);
9502
1.75k
                goto next_attr;
9503
1.75k
            }
9504
589k
            if ((len == 29) &&
9505
589k
                (xmlStrEqual(URL,
9506
14.9k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
2.90k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
2.90k
                         "reuse of the xmlns namespace name is forbidden\n",
9509
2.90k
                         NULL, NULL, NULL);
9510
2.90k
                goto next_attr;
9511
2.90k
            }
9512
586k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
16.1k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
16.1k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
16.1k
                              attname, NULL, NULL);
9516
16.1k
                goto next_attr;
9517
570k
            } else {
9518
570k
                uri = xmlParseURI((const char *) URL);
9519
570k
                if (uri == NULL) {
9520
170k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
170k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
170k
                                       attname, URL, NULL);
9523
400k
                } else {
9524
400k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
54.9k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
54.9k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
54.9k
                                  attname, URL, NULL);
9528
54.9k
                    }
9529
400k
                    xmlFreeURI(uri);
9530
400k
                }
9531
570k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
696k
            for (j = 1;j <= nbNs;j++)
9537
145k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
19.4k
                    break;
9539
570k
            if (j <= nbNs)
9540
19.4k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
551k
            else
9542
551k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
5.33M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
5.33M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
168k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
168k
                maxatts = ctxt->maxatts;
9553
168k
                atts = ctxt->atts;
9554
168k
            }
9555
5.33M
            ctxt->attallocs[nratts++] = alloc;
9556
5.33M
            atts[nbatts++] = attname;
9557
5.33M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
5.33M
            if (alloc)
9565
835k
                atts[nbatts++] = NULL;
9566
4.50M
            else
9567
4.50M
                atts[nbatts++] = ctxt->input->base;
9568
5.33M
            atts[nbatts++] = attvalue;
9569
5.33M
            attvalue += len;
9570
5.33M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
5.33M
            if (alloc != 0) attval = 1;
9575
5.33M
            attvalue = NULL; /* moved into atts */
9576
5.33M
        }
9577
9578
6.79M
next_attr:
9579
6.79M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
266k
            xmlFree(attvalue);
9581
266k
            attvalue = NULL;
9582
266k
        }
9583
9584
6.79M
  GROW
9585
6.79M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
6.79M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
2.96M
      break;
9589
3.82M
  if (SKIP_BLANKS == 0) {
9590
1.42M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
1.42M
         "attributes construct error\n");
9592
1.42M
      break;
9593
1.42M
  }
9594
2.39M
        GROW;
9595
2.39M
    }
9596
9597
9.26M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
14.5M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
5.33M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
4.50M
            const xmlChar *old = atts[i+2];
9612
4.50M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
4.50M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
4.50M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
4.50M
        }
9616
5.33M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
9.26M
    if (ctxt->attsDefault != NULL) {
9622
677k
        xmlDefAttrsPtr defaults;
9623
9624
677k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
677k
  if (defaults != NULL) {
9626
278k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
174k
          attname = defaults->values[5 * i];
9628
174k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
174k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
3.70k
        for (j = 1;j <= nbNs;j++)
9638
1.69k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
765
          break;
9640
2.78k
              if (j <= nbNs) continue;
9641
9642
2.01k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
2.01k
        if (nsname != defaults->values[5 * i + 2]) {
9644
941
      if (nsPush(ctxt, NULL,
9645
941
                 defaults->values[5 * i + 2]) > 0)
9646
931
          nbNs++;
9647
941
        }
9648
171k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
16.7k
        for (j = 1;j <= nbNs;j++)
9653
4.68k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
2.35k
          break;
9655
14.4k
              if (j <= nbNs) continue;
9656
9657
12.0k
        nsname = xmlGetNamespace(ctxt, attname);
9658
12.0k
        if (nsname != defaults->values[5 * i + 2]) {
9659
4.85k
      if (nsPush(ctxt, attname,
9660
4.85k
                 defaults->values[5 * i + 2]) > 0)
9661
4.85k
          nbNs++;
9662
4.85k
        }
9663
156k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
341k
        for (j = 0;j < nbatts;j+=5) {
9668
191k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
6.53k
          break;
9670
191k
        }
9671
156k
        if (j < nbatts) continue;
9672
9673
150k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
7.79k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
7.79k
      maxatts = ctxt->maxatts;
9679
7.79k
      atts = ctxt->atts;
9680
7.79k
        }
9681
150k
        atts[nbatts++] = attname;
9682
150k
        atts[nbatts++] = aprefix;
9683
150k
        if (aprefix == NULL)
9684
117k
      atts[nbatts++] = NULL;
9685
32.7k
        else
9686
32.7k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
150k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
150k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
150k
        if ((ctxt->standalone == 1) &&
9690
150k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
150k
        nbdef++;
9696
150k
    }
9697
174k
      }
9698
104k
  }
9699
677k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
14.7M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
5.48M
  if (atts[i + 1] != NULL) {
9709
614k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
614k
      if (nsname == NULL) {
9711
200k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
200k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
200k
        atts[i + 1], atts[i], localname);
9714
200k
      }
9715
614k
      atts[i + 2] = nsname;
9716
614k
  } else
9717
4.87M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
7.93M
        for (j = 0; j < i;j += 5) {
9725
2.49M
      if (atts[i] == atts[j]) {
9726
77.6k
          if (atts[i+1] == atts[j+1]) {
9727
53.2k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
53.2k
        break;
9729
53.2k
    }
9730
24.3k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
82
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
82
           "Namespaced Attribute %s in '%s' redefined\n",
9733
82
           atts[i], nsname, NULL);
9734
82
        break;
9735
82
    }
9736
24.3k
      }
9737
2.49M
  }
9738
5.48M
    }
9739
9740
9.26M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
9.26M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
1.37M
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
1.37M
           "Namespace prefix %s on %s is not defined\n",
9744
1.37M
     prefix, localname, NULL);
9745
1.37M
    }
9746
9.26M
    *pref = prefix;
9747
9.26M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
9.26M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
9.26M
  (!ctxt->disableSAX)) {
9754
8.36M
  if (nbNs > 0)
9755
500k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
500k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
500k
        nbatts / 5, nbdef, atts);
9758
7.86M
  else
9759
7.86M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
7.86M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
8.36M
    }
9762
9763
9.26M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
9.26M
    if (attval != 0) {
9768
1.84M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
1.06M
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
835k
          xmlFree((xmlChar *) atts[i]);
9771
783k
    }
9772
9773
9.26M
    return(localname);
9774
9.26M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
2.38M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
2.38M
    const xmlChar *name;
9794
9795
2.38M
    GROW;
9796
2.38M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
2.38M
    SKIP(2);
9801
9802
2.38M
    if (tag->prefix == NULL)
9803
1.58M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
794k
    else
9805
794k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
2.38M
    GROW;
9811
2.38M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
2.38M
    SKIP_BLANKS;
9814
2.38M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
357k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
357k
    } else
9817
2.02M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
2.38M
    if (name != (xmlChar*)1) {
9826
722k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
722k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
722k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
722k
                    ctxt->name, tag->line, name);
9830
722k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
2.38M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
2.38M
  (!ctxt->disableSAX))
9837
2.09M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
2.09M
                                tag->URI);
9839
9840
2.38M
    spacePop(ctxt);
9841
2.38M
    if (tag->nsNr != 0)
9842
81.0k
  nsPop(ctxt, tag->nsNr);
9843
2.38M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
166k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
166k
    xmlChar *buf = NULL;
9864
166k
    int len = 0;
9865
166k
    int size = XML_PARSER_BUFFER_SIZE;
9866
166k
    int r, rl;
9867
166k
    int s, sl;
9868
166k
    int cur, l;
9869
166k
    int count = 0;
9870
166k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
57.7k
                    XML_MAX_HUGE_LENGTH :
9872
166k
                    XML_MAX_TEXT_LENGTH;
9873
9874
166k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
166k
    SKIP(3);
9877
9878
166k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
166k
    SKIP(6);
9881
9882
166k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
166k
    r = CUR_CHAR(rl);
9884
166k
    if (!IS_CHAR(r)) {
9885
6.41k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
6.41k
        goto out;
9887
6.41k
    }
9888
160k
    NEXTL(rl);
9889
160k
    s = CUR_CHAR(sl);
9890
160k
    if (!IS_CHAR(s)) {
9891
6.76k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
6.76k
        goto out;
9893
6.76k
    }
9894
153k
    NEXTL(sl);
9895
153k
    cur = CUR_CHAR(l);
9896
153k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
153k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
19.8M
    while (IS_CHAR(cur) &&
9902
19.8M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
19.6M
  if (len + 5 >= size) {
9904
68.2k
      xmlChar *tmp;
9905
9906
68.2k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
68.2k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
68.2k
      buf = tmp;
9912
68.2k
      size *= 2;
9913
68.2k
  }
9914
19.6M
  COPY_BUF(rl,buf,len,r);
9915
19.6M
  r = s;
9916
19.6M
  rl = sl;
9917
19.6M
  s = cur;
9918
19.6M
  sl = l;
9919
19.6M
  count++;
9920
19.6M
  if (count > 50) {
9921
353k
      SHRINK;
9922
353k
      GROW;
9923
353k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
353k
      count = 0;
9927
353k
  }
9928
19.6M
  NEXTL(l);
9929
19.6M
  cur = CUR_CHAR(l);
9930
19.6M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
19.6M
    }
9936
153k
    buf[len] = 0;
9937
153k
    if (cur != '>') {
9938
25.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
25.7k
                       "CData section not finished\n%.50s\n", buf);
9940
25.7k
        goto out;
9941
25.7k
    }
9942
127k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
127k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
100k
  if (ctxt->sax->cdataBlock != NULL)
9949
66.0k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
34.0k
  else if (ctxt->sax->characters != NULL)
9951
34.0k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
100k
    }
9953
9954
166k
out:
9955
166k
    if (ctxt->instate != XML_PARSER_EOF)
9956
166k
        ctxt->instate = XML_PARSER_CONTENT;
9957
166k
    xmlFree(buf);
9958
166k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
240k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
240k
    int nameNr = ctxt->nameNr;
9971
9972
240k
    GROW;
9973
20.7M
    while ((RAW != 0) &&
9974
20.7M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
20.5M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
20.5M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
185k
      xmlParsePI(ctxt);
9982
185k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
20.3M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
166k
      xmlParseCDSect(ctxt);
9990
166k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
20.2M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
20.2M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
202k
      xmlParseComment(ctxt);
9998
202k
      ctxt->instate = XML_PARSER_CONTENT;
9999
202k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
19.9M
  else if (*cur == '<') {
10005
7.81M
            if (NXT(1) == '/') {
10006
1.40M
                if (ctxt->nameNr <= nameNr)
10007
35.9k
                    break;
10008
1.37M
          xmlParseElementEnd(ctxt);
10009
6.40M
            } else {
10010
6.40M
          xmlParseElementStart(ctxt);
10011
6.40M
            }
10012
7.81M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
12.1M
  else if (*cur == '&') {
10020
1.99M
      xmlParseReference(ctxt);
10021
1.99M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
10.1M
  else {
10027
10.1M
      xmlParseCharData(ctxt, 0);
10028
10.1M
  }
10029
10030
20.5M
  GROW;
10031
20.5M
  SHRINK;
10032
20.5M
    }
10033
240k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
91.6k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
91.6k
    int nameNr = ctxt->nameNr;
10047
10048
91.6k
    xmlParseContentInternal(ctxt);
10049
10050
91.6k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
3.15k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
3.15k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
3.15k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
3.15k
                "Premature end of data in tag %s line %d\n",
10055
3.15k
    name, line, NULL);
10056
3.15k
    }
10057
91.6k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
249k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
249k
    if (xmlParseElementStart(ctxt) != 0)
10078
100k
        return;
10079
10080
148k
    xmlParseContentInternal(ctxt);
10081
148k
    if (ctxt->instate == XML_PARSER_EOF)
10082
519
  return;
10083
10084
148k
    if (CUR == 0) {
10085
113k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
113k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
113k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
113k
                "Premature end of data in tag %s line %d\n",
10089
113k
    name, line, NULL);
10090
113k
        return;
10091
113k
    }
10092
10093
34.7k
    xmlParseElementEnd(ctxt);
10094
34.7k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
6.65M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
6.65M
    const xmlChar *name;
10108
6.65M
    const xmlChar *prefix = NULL;
10109
6.65M
    const xmlChar *URI = NULL;
10110
6.65M
    xmlParserNodeInfo node_info;
10111
6.65M
    int line, tlen = 0;
10112
6.65M
    xmlNodePtr ret;
10113
6.65M
    int nsNr = ctxt->nsNr;
10114
10115
6.65M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
6.65M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
201
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
201
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
201
        xmlParserMaxDepth);
10120
201
  xmlHaltParser(ctxt);
10121
201
  return(-1);
10122
201
    }
10123
10124
    /* Capture start position */
10125
6.65M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
6.65M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
6.65M
    else if (*ctxt->space == -2)
10134
1.66M
  spacePush(ctxt, -1);
10135
4.98M
    else
10136
4.98M
  spacePush(ctxt, *ctxt->space);
10137
10138
6.65M
    line = ctxt->input->line;
10139
6.65M
#ifdef LIBXML_SAX1_ENABLED
10140
6.65M
    if (ctxt->sax2)
10141
4.25M
#endif /* LIBXML_SAX1_ENABLED */
10142
4.25M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
2.39M
#ifdef LIBXML_SAX1_ENABLED
10144
2.39M
    else
10145
2.39M
  name = xmlParseStartTag(ctxt);
10146
6.65M
#endif /* LIBXML_SAX1_ENABLED */
10147
6.65M
    if (ctxt->instate == XML_PARSER_EOF)
10148
297
  return(-1);
10149
6.65M
    if (name == NULL) {
10150
1.21M
  spacePop(ctxt);
10151
1.21M
        return(-1);
10152
1.21M
    }
10153
5.43M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
5.43M
    ret = ctxt->node;
10155
10156
5.43M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
5.43M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
5.43M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
5.43M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
5.43M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
1.15M
        SKIP(2);
10172
1.15M
  if (ctxt->sax2) {
10173
795k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
795k
    (!ctxt->disableSAX))
10175
677k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
795k
#ifdef LIBXML_SAX1_ENABLED
10177
795k
  } else {
10178
363k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
363k
    (!ctxt->disableSAX))
10180
281k
    ctxt->sax->endElement(ctxt->userData, name);
10181
363k
#endif /* LIBXML_SAX1_ENABLED */
10182
363k
  }
10183
1.15M
  namePop(ctxt);
10184
1.15M
  spacePop(ctxt);
10185
1.15M
  if (nsNr != ctxt->nsNr)
10186
29.4k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
1.15M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
1.15M
  return(1);
10195
1.15M
    }
10196
4.27M
    if (RAW == '>') {
10197
2.69M
        NEXT1;
10198
2.69M
    } else {
10199
1.58M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
1.58M
         "Couldn't find end of Start Tag %s line %d\n",
10201
1.58M
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
1.58M
  nodePop(ctxt);
10207
1.58M
  namePop(ctxt);
10208
1.58M
  spacePop(ctxt);
10209
1.58M
  if (nsNr != ctxt->nsNr)
10210
114k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
1.58M
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
1.58M
  return(-1);
10223
1.58M
    }
10224
10225
2.69M
    return(0);
10226
4.27M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.40M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.40M
    xmlParserNodeInfo node_info;
10237
1.40M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.40M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.40M
    if (ctxt->sax2) {
10249
964k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
964k
  namePop(ctxt);
10251
964k
    }
10252
444k
#ifdef LIBXML_SAX1_ENABLED
10253
444k
    else
10254
444k
  xmlParseEndTag1(ctxt, 0);
10255
1.40M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.40M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.40M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
390k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
390k
    xmlChar *buf = NULL;
10286
390k
    int len = 0;
10287
390k
    int size = 10;
10288
390k
    xmlChar cur;
10289
10290
390k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
390k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
390k
    cur = CUR;
10296
390k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
8.13k
  xmlFree(buf);
10298
8.13k
  return(NULL);
10299
8.13k
    }
10300
382k
    buf[len++] = cur;
10301
382k
    NEXT;
10302
382k
    cur=CUR;
10303
382k
    if (cur != '.') {
10304
9.45k
  xmlFree(buf);
10305
9.45k
  return(NULL);
10306
9.45k
    }
10307
372k
    buf[len++] = cur;
10308
372k
    NEXT;
10309
372k
    cur=CUR;
10310
879k
    while ((cur >= '0') && (cur <= '9')) {
10311
506k
  if (len + 1 >= size) {
10312
1.76k
      xmlChar *tmp;
10313
10314
1.76k
      size *= 2;
10315
1.76k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.76k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.76k
      buf = tmp;
10322
1.76k
  }
10323
506k
  buf[len++] = cur;
10324
506k
  NEXT;
10325
506k
  cur=CUR;
10326
506k
    }
10327
372k
    buf[len] = 0;
10328
372k
    return(buf);
10329
372k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
502k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
502k
    xmlChar *version = NULL;
10349
10350
502k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
410k
  SKIP(7);
10352
410k
  SKIP_BLANKS;
10353
410k
  if (RAW != '=') {
10354
12.6k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
12.6k
      return(NULL);
10356
12.6k
        }
10357
397k
  NEXT;
10358
397k
  SKIP_BLANKS;
10359
397k
  if (RAW == '"') {
10360
346k
      NEXT;
10361
346k
      version = xmlParseVersionNum(ctxt);
10362
346k
      if (RAW != '"') {
10363
28.0k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
28.0k
      } else
10365
318k
          NEXT;
10366
346k
  } else if (RAW == '\''){
10367
44.1k
      NEXT;
10368
44.1k
      version = xmlParseVersionNum(ctxt);
10369
44.1k
      if (RAW != '\'') {
10370
2.30k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
2.30k
      } else
10372
41.8k
          NEXT;
10373
44.1k
  } else {
10374
7.05k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
7.05k
  }
10376
397k
    }
10377
489k
    return(version);
10378
502k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
120k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
120k
    xmlChar *buf = NULL;
10395
120k
    int len = 0;
10396
120k
    int size = 10;
10397
120k
    xmlChar cur;
10398
10399
120k
    cur = CUR;
10400
120k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
120k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
118k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
118k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
118k
  buf[len++] = cur;
10409
118k
  NEXT;
10410
118k
  cur = CUR;
10411
1.33M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
1.33M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
1.33M
         ((cur >= '0') && (cur <= '9')) ||
10414
1.33M
         (cur == '.') || (cur == '_') ||
10415
1.33M
         (cur == '-')) {
10416
1.21M
      if (len + 1 >= size) {
10417
55.6k
          xmlChar *tmp;
10418
10419
55.6k
    size *= 2;
10420
55.6k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
55.6k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
55.6k
    buf = tmp;
10427
55.6k
      }
10428
1.21M
      buf[len++] = cur;
10429
1.21M
      NEXT;
10430
1.21M
      cur = CUR;
10431
1.21M
      if (cur == 0) {
10432
745
          SHRINK;
10433
745
    GROW;
10434
745
    cur = CUR;
10435
745
      }
10436
1.21M
        }
10437
118k
  buf[len] = 0;
10438
118k
    } else {
10439
1.20k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.20k
    }
10441
120k
    return(buf);
10442
120k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
342k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
342k
    xmlChar *encoding = NULL;
10462
10463
342k
    SKIP_BLANKS;
10464
342k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
122k
  SKIP(8);
10466
122k
  SKIP_BLANKS;
10467
122k
  if (RAW != '=') {
10468
1.69k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.69k
      return(NULL);
10470
1.69k
        }
10471
120k
  NEXT;
10472
120k
  SKIP_BLANKS;
10473
120k
  if (RAW == '"') {
10474
99.7k
      NEXT;
10475
99.7k
      encoding = xmlParseEncName(ctxt);
10476
99.7k
      if (RAW != '"') {
10477
5.10k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
5.10k
    xmlFree((xmlChar *) encoding);
10479
5.10k
    return(NULL);
10480
5.10k
      } else
10481
94.6k
          NEXT;
10482
99.7k
  } else if (RAW == '\''){
10483
20.3k
      NEXT;
10484
20.3k
      encoding = xmlParseEncName(ctxt);
10485
20.3k
      if (RAW != '\'') {
10486
1.79k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
1.79k
    xmlFree((xmlChar *) encoding);
10488
1.79k
    return(NULL);
10489
1.79k
      } else
10490
18.6k
          NEXT;
10491
20.3k
  } else {
10492
805
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
805
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
114k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
36.2k
      xmlFree((xmlChar *) encoding);
10500
36.2k
            return(NULL);
10501
36.2k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
77.7k
        if ((encoding != NULL) &&
10508
77.7k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
77.2k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
3.93k
      if ((ctxt->encoding == NULL) &&
10517
3.93k
          (ctxt->input->buf != NULL) &&
10518
3.93k
          (ctxt->input->buf->encoder == NULL)) {
10519
3.93k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
3.93k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
3.93k
      }
10522
3.93k
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
3.93k
      ctxt->encoding = encoding;
10525
3.93k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
73.8k
        else if ((encoding != NULL) &&
10530
73.8k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
73.2k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
27.0k
      if (ctxt->encoding != NULL)
10533
3
    xmlFree((xmlChar *) ctxt->encoding);
10534
27.0k
      ctxt->encoding = encoding;
10535
27.0k
  }
10536
46.8k
  else if (encoding != NULL) {
10537
46.2k
      xmlCharEncodingHandlerPtr handler;
10538
10539
46.2k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
46.2k
      ctxt->input->encoding = encoding;
10542
10543
46.2k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
46.2k
      if (handler != NULL) {
10545
45.1k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
241
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
241
        return(NULL);
10549
241
    }
10550
45.1k
      } else {
10551
1.13k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
1.13k
      "Unsupported encoding %s\n", encoding);
10553
1.13k
    return(NULL);
10554
1.13k
      }
10555
46.2k
  }
10556
77.7k
    }
10557
295k
    return(encoding);
10558
342k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
295k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
295k
    int standalone = -2;
10596
10597
295k
    SKIP_BLANKS;
10598
295k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
53.8k
  SKIP(10);
10600
53.8k
        SKIP_BLANKS;
10601
53.8k
  if (RAW != '=') {
10602
543
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
543
      return(standalone);
10604
543
        }
10605
53.2k
  NEXT;
10606
53.2k
  SKIP_BLANKS;
10607
53.2k
        if (RAW == '\''){
10608
9.56k
      NEXT;
10609
9.56k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
7.70k
          standalone = 0;
10611
7.70k
                SKIP(2);
10612
7.70k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
1.86k
                 (NXT(2) == 's')) {
10614
1.47k
          standalone = 1;
10615
1.47k
    SKIP(3);
10616
1.47k
            } else {
10617
387
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
387
      }
10619
9.56k
      if (RAW != '\'') {
10620
594
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
594
      } else
10622
8.97k
          NEXT;
10623
43.7k
  } else if (RAW == '"'){
10624
42.3k
      NEXT;
10625
42.3k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
22.5k
          standalone = 0;
10627
22.5k
    SKIP(2);
10628
22.5k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
19.8k
                 (NXT(2) == 's')) {
10630
18.2k
          standalone = 1;
10631
18.2k
                SKIP(3);
10632
18.2k
            } else {
10633
1.54k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
1.54k
      }
10635
42.3k
      if (RAW != '"') {
10636
2.28k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
2.28k
      } else
10638
40.0k
          NEXT;
10639
42.3k
  } else {
10640
1.34k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
1.34k
        }
10642
53.2k
    }
10643
295k
    return(standalone);
10644
295k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
482k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
482k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
482k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
482k
    SKIP(5);
10672
10673
482k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
482k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
482k
    version = xmlParseVersionInfo(ctxt);
10683
482k
    if (version == NULL) {
10684
124k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
358k
    } else {
10686
358k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
43.6k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
14.4k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
14.4k
                "Unsupported version '%s'\n",
10693
14.4k
                version);
10694
29.2k
      } else {
10695
29.2k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
4.70k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
4.70k
                      "Unsupported version '%s'\n",
10698
4.70k
          version, NULL);
10699
24.5k
    } else {
10700
24.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
24.5k
              "Unsupported version '%s'\n",
10702
24.5k
              version);
10703
24.5k
    }
10704
29.2k
      }
10705
43.6k
  }
10706
358k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
358k
  ctxt->version = version;
10709
358k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
482k
    if (!IS_BLANK_CH(RAW)) {
10715
305k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
160k
      SKIP(2);
10717
160k
      return;
10718
160k
  }
10719
145k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
145k
    }
10721
322k
    xmlParseEncodingDecl(ctxt);
10722
322k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
322k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
1.03k
        return;
10728
1.03k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
321k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
32.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
25.6k
      SKIP(2);
10736
25.6k
      return;
10737
25.6k
  }
10738
6.93k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
6.93k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
295k
    GROW;
10745
10746
295k
    SKIP_BLANKS;
10747
295k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
295k
    SKIP_BLANKS;
10750
295k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
105k
        SKIP(2);
10752
190k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
984
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
984
  NEXT;
10756
189k
    } else {
10757
189k
        int c;
10758
10759
189k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
6.89M
        while ((c = CUR) != 0) {
10761
6.88M
            NEXT;
10762
6.88M
            if (c == '>')
10763
180k
                break;
10764
6.88M
        }
10765
189k
    }
10766
295k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
720k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
828k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
828k
        SKIP_BLANKS;
10783
828k
        GROW;
10784
828k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
79.7k
      xmlParsePI(ctxt);
10786
748k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
27.7k
      xmlParseComment(ctxt);
10788
720k
        } else {
10789
720k
            break;
10790
720k
        }
10791
828k
    }
10792
720k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
367k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
367k
    xmlChar start[4];
10812
367k
    xmlCharEncoding enc;
10813
10814
367k
    xmlInitParser();
10815
10816
367k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
367k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
367k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
367k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
367k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
367k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
367k
    if ((ctxt->encoding == NULL) &&
10835
367k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
360k
  start[0] = RAW;
10842
360k
  start[1] = NXT(1);
10843
360k
  start[2] = NXT(2);
10844
360k
  start[3] = NXT(3);
10845
360k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
360k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
190k
      xmlSwitchEncoding(ctxt, enc);
10848
190k
  }
10849
360k
    }
10850
10851
10852
367k
    if (CUR == 0) {
10853
1.73k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
1.73k
  return(-1);
10855
1.73k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
365k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
23.3k
       GROW;
10865
23.3k
    }
10866
365k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
161k
  xmlParseXMLDecl(ctxt);
10872
161k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
161k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
349
      return(-1);
10878
349
  }
10879
160k
  ctxt->standalone = ctxt->input->standalone;
10880
160k
  SKIP_BLANKS;
10881
204k
    } else {
10882
204k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
204k
    }
10884
365k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
342k
        ctxt->sax->startDocument(ctxt->userData);
10886
365k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
365k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
365k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
365k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
365k
    GROW;
10903
365k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
181k
  ctxt->inSubset = 1;
10906
181k
  xmlParseDocTypeDecl(ctxt);
10907
181k
  if (RAW == '[') {
10908
130k
      ctxt->instate = XML_PARSER_DTD;
10909
130k
      xmlParseInternalSubset(ctxt);
10910
130k
      if (ctxt->instate == XML_PARSER_EOF)
10911
64.9k
    return(-1);
10912
130k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
116k
  ctxt->inSubset = 2;
10918
116k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
116k
      (!ctxt->disableSAX))
10920
106k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
106k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
116k
  if (ctxt->instate == XML_PARSER_EOF)
10923
10.8k
      return(-1);
10924
105k
  ctxt->inSubset = 0;
10925
10926
105k
        xmlCleanSpecialAttr(ctxt);
10927
10928
105k
  ctxt->instate = XML_PARSER_PROLOG;
10929
105k
  xmlParseMisc(ctxt);
10930
105k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
289k
    GROW;
10936
289k
    if (RAW != '<') {
10937
40.4k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
40.4k
           "Start tag expected, '<' not found\n");
10939
249k
    } else {
10940
249k
  ctxt->instate = XML_PARSER_CONTENT;
10941
249k
  xmlParseElement(ctxt);
10942
249k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
249k
  xmlParseMisc(ctxt);
10949
10950
249k
  if (RAW != 0) {
10951
96.3k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
96.3k
  }
10953
249k
  ctxt->instate = XML_PARSER_EOF;
10954
249k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
289k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
289k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
289k
    if ((ctxt->myDoc != NULL) &&
10966
289k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
1.00k
  xmlFreeDoc(ctxt->myDoc);
10968
1.00k
  ctxt->myDoc = NULL;
10969
1.00k
    }
10970
10971
289k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
10.8k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
10.8k
  if (ctxt->valid)
10974
8.02k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
10.8k
  if (ctxt->nsWellFormed)
10976
10.2k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
10.8k
  if (ctxt->options & XML_PARSE_OLD10)
10978
1.45k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
10.8k
    }
10980
289k
    if (! ctxt->wellFormed) {
10981
278k
  ctxt->valid = 0;
10982
278k
  return(-1);
10983
278k
    }
10984
10.8k
    return(0);
10985
289k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
3.85M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
3.85M
    const xmlChar *cur;
11110
11111
3.85M
    if (ctxt->checkIndex == 0) {
11112
3.49M
        cur = ctxt->input->cur + 1;
11113
3.49M
    } else {
11114
350k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
350k
    }
11116
11117
3.85M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
377k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
377k
        return(0);
11120
3.47M
    } else {
11121
3.47M
        ctxt->checkIndex = 0;
11122
3.47M
        return(1);
11123
3.47M
    }
11124
3.85M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
2.64M
                     const char *str, size_t strLen) {
11138
2.64M
    const xmlChar *cur, *term;
11139
11140
2.64M
    if (ctxt->checkIndex == 0) {
11141
1.25M
        cur = ctxt->input->cur + startDelta;
11142
1.38M
    } else {
11143
1.38M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.38M
    }
11145
11146
2.64M
    term = BAD_CAST strstr((const char *) cur, str);
11147
2.64M
    if (term == NULL) {
11148
1.61M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
1.61M
        if ((size_t) (end - cur) < strLen)
11152
56.8k
            end = cur;
11153
1.55M
        else
11154
1.55M
            end -= strLen - 1;
11155
1.61M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
1.61M
    } else {
11157
1.02M
        ctxt->checkIndex = 0;
11158
1.02M
    }
11159
11160
2.64M
    return(term);
11161
2.64M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
9.76M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
9.76M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
9.76M
    const xmlChar *end = ctxt->input->end;
11173
11174
262M
    while (cur < end) {
11175
261M
        if ((*cur == '<') || (*cur == '&')) {
11176
9.15M
            ctxt->checkIndex = 0;
11177
9.15M
            return(1);
11178
9.15M
        }
11179
252M
        cur++;
11180
252M
    }
11181
11182
607k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
607k
    return(0);
11184
9.76M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
8.93M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
8.93M
    const xmlChar *cur;
11196
8.93M
    const xmlChar *end = ctxt->input->end;
11197
8.93M
    int state = ctxt->endCheckState;
11198
11199
8.93M
    if (ctxt->checkIndex == 0)
11200
6.64M
        cur = ctxt->input->cur + 1;
11201
2.28M
    else
11202
2.28M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
639M
    while (cur < end) {
11205
637M
        if (state) {
11206
337M
            if (*cur == state)
11207
8.61M
                state = 0;
11208
337M
        } else if (*cur == '\'' || *cur == '"') {
11209
8.73M
            state = *cur;
11210
291M
        } else if (*cur == '>') {
11211
6.45M
            ctxt->checkIndex = 0;
11212
6.45M
            ctxt->endCheckState = 0;
11213
6.45M
            return(1);
11214
6.45M
        }
11215
630M
        cur++;
11216
630M
    }
11217
11218
2.48M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
2.48M
    ctxt->endCheckState = state;
11220
2.48M
    return(0);
11221
8.93M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
663k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
663k
    const xmlChar *cur, *start;
11240
663k
    const xmlChar *end = ctxt->input->end;
11241
663k
    int state = ctxt->endCheckState;
11242
11243
663k
    if (ctxt->checkIndex == 0) {
11244
214k
        cur = ctxt->input->cur + 1;
11245
449k
    } else {
11246
449k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
449k
    }
11248
663k
    start = cur;
11249
11250
110M
    while (cur < end) {
11251
110M
        if (state == '-') {
11252
17.8M
            if ((*cur == '-') &&
11253
17.8M
                (cur[1] == '-') &&
11254
17.8M
                (cur[2] == '>')) {
11255
131k
                state = 0;
11256
131k
                cur += 3;
11257
131k
                start = cur;
11258
131k
                continue;
11259
131k
            }
11260
17.8M
        }
11261
92.2M
        else if (state == ']') {
11262
385k
            if (*cur == '>') {
11263
142k
                ctxt->checkIndex = 0;
11264
142k
                ctxt->endCheckState = 0;
11265
142k
                return(1);
11266
142k
            }
11267
242k
            if (IS_BLANK_CH(*cur)) {
11268
18.1k
                state = ' ';
11269
224k
            } else if (*cur != ']') {
11270
27.5k
                state = 0;
11271
27.5k
                start = cur;
11272
27.5k
                continue;
11273
27.5k
            }
11274
242k
        }
11275
91.8M
        else if (state == ' ') {
11276
117k
            if (*cur == '>') {
11277
1.38k
                ctxt->checkIndex = 0;
11278
1.38k
                ctxt->endCheckState = 0;
11279
1.38k
                return(1);
11280
1.38k
            }
11281
115k
            if (!IS_BLANK_CH(*cur)) {
11282
16.7k
                state = 0;
11283
16.7k
                start = cur;
11284
16.7k
                continue;
11285
16.7k
            }
11286
115k
        }
11287
91.7M
        else if (state != 0) {
11288
39.3M
            if (*cur == state) {
11289
918k
                state = 0;
11290
918k
                start = cur + 1;
11291
918k
            }
11292
39.3M
        }
11293
52.4M
        else if (*cur == '<') {
11294
1.50M
            if ((cur[1] == '!') &&
11295
1.50M
                (cur[2] == '-') &&
11296
1.50M
                (cur[3] == '-')) {
11297
134k
                state = '-';
11298
134k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
134k
                start = cur;
11301
134k
                continue;
11302
134k
            }
11303
1.50M
        }
11304
50.9M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
1.13M
            state = *cur;
11306
1.13M
        }
11307
11308
109M
        cur++;
11309
109M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
520k
    if ((state == 0) || (state == '-')) {
11316
320k
        if (cur - start < 3)
11317
21.3k
            cur = start;
11318
299k
        else
11319
299k
            cur -= 3;
11320
320k
    }
11321
520k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
520k
    ctxt->endCheckState = state;
11323
520k
    return(0);
11324
663k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
387k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
387k
    int ix;
11340
387k
    unsigned char c;
11341
387k
    int codepoint;
11342
11343
387k
    if ((utf == NULL) || (len <= 0))
11344
28.2k
        return(0);
11345
11346
16.1M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
16.0M
        c = utf[ix];
11348
16.0M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
13.2M
      if (c >= 0x20)
11350
12.4M
    ix++;
11351
832k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
798k
          ix++;
11353
33.9k
      else
11354
33.9k
          return(-ix);
11355
13.2M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
1.67M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
1.66M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
26.7k
          return(-ix);
11359
1.63M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
1.63M
      codepoint |= utf[ix+1] & 0x3f;
11361
1.63M
      if (!xmlIsCharQ(codepoint))
11362
9.48k
          return(-ix);
11363
1.62M
      ix += 2;
11364
1.62M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
599k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
592k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
592k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
25.3k
        return(-ix);
11369
567k
      codepoint = (utf[ix] & 0xf) << 12;
11370
567k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
567k
      codepoint |= utf[ix+2] & 0x3f;
11372
567k
      if (!xmlIsCharQ(codepoint))
11373
13.8k
          return(-ix);
11374
553k
      ix += 3;
11375
553k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
400k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
394k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
394k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
394k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
24.3k
        return(-ix);
11381
369k
      codepoint = (utf[ix] & 0x7) << 18;
11382
369k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
369k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
369k
      codepoint |= utf[ix+3] & 0x3f;
11385
369k
      if (!xmlIsCharQ(codepoint))
11386
11.6k
          return(-ix);
11387
358k
      ix += 4;
11388
358k
  } else       /* unknown encoding */
11389
42.7k
      return(-ix);
11390
16.0M
      }
11391
150k
      return(ix);
11392
359k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
6.53M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
6.53M
    int ret = 0;
11406
6.53M
    int avail, tlen;
11407
6.53M
    xmlChar cur, next;
11408
11409
6.53M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
6.53M
    if ((ctxt->input != NULL) &&
11466
6.53M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
60.3k
        xmlParserInputShrink(ctxt->input);
11468
60.3k
    }
11469
11470
62.3M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
62.3M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
205k
      return(0);
11473
11474
62.1M
  if (ctxt->input == NULL) break;
11475
62.1M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
62.1M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
62.1M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
62.1M
          (ctxt->input->buf->raw != NULL) &&
11488
62.1M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
1.30M
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
1.30M
                                                 ctxt->input);
11491
1.30M
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
1.30M
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
1.30M
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
1.30M
                                      base, current);
11496
1.30M
      }
11497
62.1M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
62.1M
        (ctxt->input->cur - ctxt->input->base);
11499
62.1M
  }
11500
62.1M
        if (avail < 1)
11501
264k
      goto done;
11502
61.9M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
2.25M
            case XML_PARSER_START:
11509
2.25M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
566k
        xmlChar start[4];
11511
566k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
566k
        if (avail < 4)
11517
35.9k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
530k
        start[0] = RAW;
11527
530k
        start[1] = NXT(1);
11528
530k
        start[2] = NXT(2);
11529
530k
        start[3] = NXT(3);
11530
530k
        enc = xmlDetectCharEncoding(start, 4);
11531
530k
        xmlSwitchEncoding(ctxt, enc);
11532
530k
        break;
11533
566k
    }
11534
11535
1.68M
    if (avail < 2)
11536
297
        goto done;
11537
1.68M
    cur = ctxt->input->cur[0];
11538
1.68M
    next = ctxt->input->cur[1];
11539
1.68M
    if (cur == 0) {
11540
2.60k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
2.60k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
2.60k
                  &xmlDefaultSAXLocator);
11543
2.60k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
2.60k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
2.60k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
2.60k
      ctxt->sax->endDocument(ctxt->userData);
11551
2.60k
        goto done;
11552
2.60k
    }
11553
1.68M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
1.36M
        if (avail < 5) goto done;
11556
1.36M
        if ((!terminate) &&
11557
1.36M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
969k
      goto done;
11559
390k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
390k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
390k
                  &xmlDefaultSAXLocator);
11562
390k
        if ((ctxt->input->cur[2] == 'x') &&
11563
390k
      (ctxt->input->cur[3] == 'm') &&
11564
390k
      (ctxt->input->cur[4] == 'l') &&
11565
390k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
321k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
321k
      xmlParseXMLDecl(ctxt);
11572
321k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
687
          xmlHaltParser(ctxt);
11578
687
          return(0);
11579
687
      }
11580
321k
      ctxt->standalone = ctxt->input->standalone;
11581
321k
      if ((ctxt->encoding == NULL) &&
11582
321k
          (ctxt->input->encoding != NULL))
11583
29.3k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
321k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
321k
          (!ctxt->disableSAX))
11586
274k
          ctxt->sax->startDocument(ctxt->userData);
11587
321k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
321k
        } else {
11593
68.9k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
68.9k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
68.9k
          (!ctxt->disableSAX))
11596
68.9k
          ctxt->sax->startDocument(ctxt->userData);
11597
68.9k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
68.9k
        }
11603
390k
    } else {
11604
324k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
324k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
324k
                  &xmlDefaultSAXLocator);
11607
324k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
324k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
324k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
324k
            (!ctxt->disableSAX))
11614
324k
      ctxt->sax->startDocument(ctxt->userData);
11615
324k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
324k
    }
11621
714k
    break;
11622
11.3M
            case XML_PARSER_START_TAG: {
11623
11.3M
          const xmlChar *name;
11624
11.3M
    const xmlChar *prefix = NULL;
11625
11.3M
    const xmlChar *URI = NULL;
11626
11.3M
                int line = ctxt->input->line;
11627
11.3M
    int nsNr = ctxt->nsNr;
11628
11629
11.3M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
11.3M
    cur = ctxt->input->cur[0];
11632
11.3M
          if (cur != '<') {
11633
29.7k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
29.7k
        xmlHaltParser(ctxt);
11635
29.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
29.7k
      ctxt->sax->endDocument(ctxt->userData);
11637
29.7k
        goto done;
11638
29.7k
    }
11639
11.3M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
2.26M
                    goto done;
11641
9.04M
    if (ctxt->spaceNr == 0)
11642
447k
        spacePush(ctxt, -1);
11643
8.60M
    else if (*ctxt->space == -2)
11644
2.04M
        spacePush(ctxt, -1);
11645
6.55M
    else
11646
6.55M
        spacePush(ctxt, *ctxt->space);
11647
9.04M
#ifdef LIBXML_SAX1_ENABLED
11648
9.04M
    if (ctxt->sax2)
11649
5.76M
#endif /* LIBXML_SAX1_ENABLED */
11650
5.76M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
3.28M
#ifdef LIBXML_SAX1_ENABLED
11652
3.28M
    else
11653
3.28M
        name = xmlParseStartTag(ctxt);
11654
9.04M
#endif /* LIBXML_SAX1_ENABLED */
11655
9.04M
    if (ctxt->instate == XML_PARSER_EOF)
11656
785
        goto done;
11657
9.04M
    if (name == NULL) {
11658
44.6k
        spacePop(ctxt);
11659
44.6k
        xmlHaltParser(ctxt);
11660
44.6k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
44.6k
      ctxt->sax->endDocument(ctxt->userData);
11662
44.6k
        goto done;
11663
44.6k
    }
11664
9.00M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
9.00M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
9.00M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
9.00M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
9.00M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
1.79M
        SKIP(2);
11680
11681
1.79M
        if (ctxt->sax2) {
11682
1.31M
      if ((ctxt->sax != NULL) &&
11683
1.31M
          (ctxt->sax->endElementNs != NULL) &&
11684
1.31M
          (!ctxt->disableSAX))
11685
1.31M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
1.31M
                                  prefix, URI);
11687
1.31M
      if (ctxt->nsNr - nsNr > 0)
11688
45.8k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
1.31M
#ifdef LIBXML_SAX1_ENABLED
11690
1.31M
        } else {
11691
480k
      if ((ctxt->sax != NULL) &&
11692
480k
          (ctxt->sax->endElement != NULL) &&
11693
480k
          (!ctxt->disableSAX))
11694
479k
          ctxt->sax->endElement(ctxt->userData, name);
11695
480k
#endif /* LIBXML_SAX1_ENABLED */
11696
480k
        }
11697
1.79M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
1.79M
        spacePop(ctxt);
11700
1.79M
        if (ctxt->nameNr == 0) {
11701
5.23k
      ctxt->instate = XML_PARSER_EPILOG;
11702
1.79M
        } else {
11703
1.79M
      ctxt->instate = XML_PARSER_CONTENT;
11704
1.79M
        }
11705
1.79M
        break;
11706
1.79M
    }
11707
7.20M
    if (RAW == '>') {
11708
3.74M
        NEXT;
11709
3.74M
    } else {
11710
3.46M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
3.46M
           "Couldn't find end of Start Tag %s\n",
11712
3.46M
           name);
11713
3.46M
        nodePop(ctxt);
11714
3.46M
        spacePop(ctxt);
11715
3.46M
    }
11716
7.20M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
7.20M
    ctxt->instate = XML_PARSER_CONTENT;
11719
7.20M
                break;
11720
9.00M
      }
11721
43.5M
            case XML_PARSER_CONTENT: {
11722
43.5M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
81.3k
        goto done;
11724
43.4M
    cur = ctxt->input->cur[0];
11725
43.4M
    next = ctxt->input->cur[1];
11726
11727
43.4M
    if ((cur == '<') && (next == '/')) {
11728
2.12M
        ctxt->instate = XML_PARSER_END_TAG;
11729
2.12M
        break;
11730
41.3M
          } else if ((cur == '<') && (next == '?')) {
11731
440k
        if ((!terminate) &&
11732
440k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
114k
      goto done;
11734
325k
        xmlParsePI(ctxt);
11735
325k
        ctxt->instate = XML_PARSER_CONTENT;
11736
40.8M
    } else if ((cur == '<') && (next != '!')) {
11737
8.65M
        ctxt->instate = XML_PARSER_START_TAG;
11738
8.65M
        break;
11739
32.2M
    } else if ((cur == '<') && (next == '!') &&
11740
32.2M
               (ctxt->input->cur[2] == '-') &&
11741
32.2M
         (ctxt->input->cur[3] == '-')) {
11742
514k
        if ((!terminate) &&
11743
514k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
189k
      goto done;
11745
325k
        xmlParseComment(ctxt);
11746
325k
        ctxt->instate = XML_PARSER_CONTENT;
11747
31.7M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
31.7M
        (ctxt->input->cur[2] == '[') &&
11749
31.7M
        (ctxt->input->cur[3] == 'C') &&
11750
31.7M
        (ctxt->input->cur[4] == 'D') &&
11751
31.7M
        (ctxt->input->cur[5] == 'A') &&
11752
31.7M
        (ctxt->input->cur[6] == 'T') &&
11753
31.7M
        (ctxt->input->cur[7] == 'A') &&
11754
31.7M
        (ctxt->input->cur[8] == '[')) {
11755
156k
        SKIP(9);
11756
156k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
156k
        break;
11758
31.5M
    } else if ((cur == '<') && (next == '!') &&
11759
31.5M
               (avail < 9)) {
11760
26.5k
        goto done;
11761
31.5M
    } else if (cur == '<') {
11762
1.36M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
1.36M
                    "detected an error in element content\n");
11764
1.36M
                    SKIP(1);
11765
30.1M
    } else if (cur == '&') {
11766
2.97M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
298k
      goto done;
11768
2.67M
        xmlParseReference(ctxt);
11769
27.1M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
27.1M
        if ((ctxt->inputNr == 1) &&
11783
27.1M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
11.7M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
607k
          goto done;
11786
11.7M
                    }
11787
26.5M
                    ctxt->checkIndex = 0;
11788
26.5M
        xmlParseCharData(ctxt, 0);
11789
26.5M
    }
11790
31.2M
    break;
11791
43.4M
      }
11792
31.2M
            case XML_PARSER_END_TAG:
11793
2.19M
    if (avail < 2)
11794
0
        goto done;
11795
2.19M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
78.2k
        goto done;
11797
2.11M
    if (ctxt->sax2) {
11798
1.42M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.42M
        nameNsPop(ctxt);
11800
1.42M
    }
11801
698k
#ifdef LIBXML_SAX1_ENABLED
11802
698k
      else
11803
698k
        xmlParseEndTag1(ctxt, 0);
11804
2.11M
#endif /* LIBXML_SAX1_ENABLED */
11805
2.11M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
2.11M
    } else if (ctxt->nameNr == 0) {
11808
37.4k
        ctxt->instate = XML_PARSER_EPILOG;
11809
2.08M
    } else {
11810
2.08M
        ctxt->instate = XML_PARSER_CONTENT;
11811
2.08M
    }
11812
2.11M
    break;
11813
498k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
498k
    const xmlChar *term;
11819
11820
498k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
50.0k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
50.0k
                                           "]]>");
11827
448k
                } else {
11828
448k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
448k
                }
11830
11831
498k
    if (term == NULL) {
11832
257k
        int tmp, size;
11833
11834
257k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
8.73k
                        size = ctxt->input->end - ctxt->input->cur;
11837
249k
                    } else {
11838
249k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
111k
                            goto done;
11840
138k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
138k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
138k
                    }
11844
146k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
146k
                    if (tmp <= 0) {
11846
99.1k
                        tmp = -tmp;
11847
99.1k
                        ctxt->input->cur += tmp;
11848
99.1k
                        goto encoding_error;
11849
99.1k
                    }
11850
47.7k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
47.7k
                        if (ctxt->sax->cdataBlock != NULL)
11852
28.3k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
28.3k
                                                  ctxt->input->cur, tmp);
11854
19.3k
                        else if (ctxt->sax->characters != NULL)
11855
19.3k
                            ctxt->sax->characters(ctxt->userData,
11856
19.3k
                                                  ctxt->input->cur, tmp);
11857
47.7k
                    }
11858
47.7k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
47.7k
                    SKIPL(tmp);
11861
240k
    } else {
11862
240k
                    int base = term - CUR_PTR;
11863
240k
        int tmp;
11864
11865
240k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
240k
        if ((tmp < 0) || (tmp != base)) {
11867
104k
      tmp = -tmp;
11868
104k
      ctxt->input->cur += tmp;
11869
104k
      goto encoding_error;
11870
104k
        }
11871
136k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
136k
            (ctxt->sax->cdataBlock != NULL) &&
11873
136k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
18.1k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
18.1k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
18.1k
                     "<![CDATA[", 9)))
11882
18.1k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
18.1k
                                 BAD_CAST "", 0);
11884
118k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
118k
      (!ctxt->disableSAX)) {
11886
107k
      if (ctxt->sax->cdataBlock != NULL)
11887
72.0k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
72.0k
              ctxt->input->cur, base);
11889
35.9k
      else if (ctxt->sax->characters != NULL)
11890
35.9k
          ctxt->sax->characters(ctxt->userData,
11891
35.9k
              ctxt->input->cur, base);
11892
107k
        }
11893
136k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
136k
        SKIPL(base + 3);
11896
136k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
136k
    }
11902
183k
    break;
11903
498k
      }
11904
1.10M
            case XML_PARSER_MISC:
11905
1.31M
            case XML_PARSER_PROLOG:
11906
1.35M
            case XML_PARSER_EPILOG:
11907
1.35M
    SKIP_BLANKS;
11908
1.35M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.35M
    else
11912
1.35M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.35M
                (ctxt->input->cur - ctxt->input->base);
11914
1.35M
    if (avail < 2)
11915
35.6k
        goto done;
11916
1.32M
    cur = ctxt->input->cur[0];
11917
1.32M
    next = ctxt->input->cur[1];
11918
1.32M
          if ((cur == '<') && (next == '?')) {
11919
150k
        if ((!terminate) &&
11920
150k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
35.7k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
114k
        xmlParsePI(ctxt);
11927
114k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
1.17M
    } else if ((cur == '<') && (next == '!') &&
11930
1.17M
        (ctxt->input->cur[2] == '-') &&
11931
1.17M
        (ctxt->input->cur[3] == '-')) {
11932
90.6k
        if ((!terminate) &&
11933
90.6k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
52.4k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
38.1k
        xmlParseComment(ctxt);
11940
38.1k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
1.08M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
1.08M
                    (cur == '<') && (next == '!') &&
11944
1.08M
        (ctxt->input->cur[2] == 'D') &&
11945
1.08M
        (ctxt->input->cur[3] == 'O') &&
11946
1.08M
        (ctxt->input->cur[4] == 'C') &&
11947
1.08M
        (ctxt->input->cur[5] == 'T') &&
11948
1.08M
        (ctxt->input->cur[6] == 'Y') &&
11949
1.08M
        (ctxt->input->cur[7] == 'P') &&
11950
1.08M
        (ctxt->input->cur[8] == 'E')) {
11951
549k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
212k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
336k
        ctxt->inSubset = 1;
11958
336k
        xmlParseDocTypeDecl(ctxt);
11959
336k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
336k
        if (RAW == '[') {
11962
242k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
242k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
94.6k
      ctxt->inSubset = 2;
11972
94.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
94.6k
          (ctxt->sax->externalSubset != NULL))
11974
88.8k
          ctxt->sax->externalSubset(ctxt->userData,
11975
88.8k
            ctxt->intSubName, ctxt->extSubSystem,
11976
88.8k
            ctxt->extSubURI);
11977
94.6k
      ctxt->inSubset = 0;
11978
94.6k
      xmlCleanSpecialAttr(ctxt);
11979
94.6k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
94.6k
        }
11985
532k
    } else if ((cur == '<') && (next == '!') &&
11986
532k
               (avail <
11987
43.7k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
38.9k
        goto done;
11989
493k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
11.0k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
11.0k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
11.0k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
11.0k
      ctxt->sax->endDocument(ctxt->userData);
11998
11.0k
        goto done;
11999
482k
                } else {
12000
482k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
482k
    }
12006
971k
    break;
12007
971k
            case XML_PARSER_DTD: {
12008
734k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
520k
                    goto done;
12010
214k
    xmlParseInternalSubset(ctxt);
12011
214k
    if (ctxt->instate == XML_PARSER_EOF)
12012
90.0k
        goto done;
12013
124k
    ctxt->inSubset = 2;
12014
124k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
124k
        (ctxt->sax->externalSubset != NULL))
12016
120k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
120k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
124k
    ctxt->inSubset = 0;
12019
124k
    xmlCleanSpecialAttr(ctxt);
12020
124k
    if (ctxt->instate == XML_PARSER_EOF)
12021
4.90k
        goto done;
12022
119k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
119k
                break;
12028
124k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
61.9M
  }
12102
61.9M
    }
12103
6.12M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
6.12M
    return(ret);
12108
203k
encoding_error:
12109
203k
    {
12110
203k
        char buffer[150];
12111
12112
203k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
203k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
203k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
203k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
203k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
203k
         BAD_CAST buffer, NULL);
12118
203k
    }
12119
203k
    return(0);
12120
6.53M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
7.63M
              int terminate) {
12136
7.63M
    int end_in_lf = 0;
12137
7.63M
    int remain = 0;
12138
12139
7.63M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
7.63M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
1.27M
        return(ctxt->errNo);
12143
6.35M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.77k
        return(-1);
12145
6.35M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
6.35M
    ctxt->progressive = 1;
12149
6.35M
    if (ctxt->instate == XML_PARSER_START)
12150
1.54M
        xmlDetectSAX2(ctxt);
12151
6.35M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
6.35M
        (chunk[size - 1] == '\r')) {
12153
45.4k
  end_in_lf = 1;
12154
45.4k
  size--;
12155
45.4k
    }
12156
12157
6.54M
xmldecl_done:
12158
12159
6.54M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
6.54M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
6.04M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
6.04M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
6.04M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
6.04M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
6.04M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
295k
            unsigned int len = 45;
12173
12174
295k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
295k
                               BAD_CAST "UTF-16")) ||
12176
295k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
2.65k
                               BAD_CAST "UTF16")))
12178
292k
                len = 90;
12179
2.65k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
2.65k
                                    BAD_CAST "UCS-4")) ||
12181
2.65k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
871
                                    BAD_CAST "UCS4")))
12183
1.78k
                len = 180;
12184
12185
295k
            if (ctxt->input->buf->rawconsumed < len)
12186
15.2k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
295k
            if ((unsigned int) size > len) {
12194
187k
                remain = size - len;
12195
187k
                size = len;
12196
187k
            } else {
12197
108k
                remain = 0;
12198
108k
            }
12199
295k
        }
12200
6.04M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
6.04M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
6.04M
  if (res < 0) {
12203
1.83k
      ctxt->errNo = XML_PARSER_EOF;
12204
1.83k
      xmlHaltParser(ctxt);
12205
1.83k
      return (XML_PARSER_EOF);
12206
1.83k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
6.04M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
498k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
498k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
498k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
498k
        (in->raw != NULL)) {
12216
49.6k
    int nbchars;
12217
49.6k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
49.6k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
49.6k
    nbchars = xmlCharEncInput(in, terminate);
12221
49.6k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
49.6k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
3.26k
        xmlGenericError(xmlGenericErrorContext,
12225
3.26k
            "xmlParseChunk: encoder error\n");
12226
3.26k
                    xmlHaltParser(ctxt);
12227
3.26k
        return(XML_ERR_INVALID_ENCODING);
12228
3.26k
    }
12229
49.6k
      }
12230
498k
  }
12231
498k
    }
12232
12233
6.53M
    if (remain != 0) {
12234
187k
        xmlParseTryOrFinish(ctxt, 0);
12235
6.34M
    } else {
12236
6.34M
        xmlParseTryOrFinish(ctxt, terminate);
12237
6.34M
    }
12238
6.53M
    if (ctxt->instate == XML_PARSER_EOF)
12239
185k
        return(ctxt->errNo);
12240
12241
6.35M
    if ((ctxt->input != NULL) &&
12242
6.35M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
6.35M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
6.35M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
6.35M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
207k
        return(ctxt->errNo);
12250
12251
6.14M
    if (remain != 0) {
12252
185k
        chunk += size;
12253
185k
        size = remain;
12254
185k
        remain = 0;
12255
185k
        goto xmldecl_done;
12256
185k
    }
12257
5.95M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
5.95M
        (ctxt->input->buf != NULL)) {
12259
44.1k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
44.1k
           ctxt->input);
12261
44.1k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
44.1k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
44.1k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
44.1k
            base, current);
12267
44.1k
    }
12268
5.95M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
213k
  int cur_avail = 0;
12273
12274
213k
  if (ctxt->input != NULL) {
12275
213k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
213k
      else
12279
213k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
213k
                    (ctxt->input->cur - ctxt->input->base);
12281
213k
  }
12282
12283
213k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
213k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
187k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
187k
  }
12287
213k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
507
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
507
  }
12290
213k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
213k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
213k
    ctxt->sax->endDocument(ctxt->userData);
12293
213k
  }
12294
213k
  ctxt->instate = XML_PARSER_EOF;
12295
213k
    }
12296
5.95M
    if (ctxt->wellFormed == 0)
12297
2.94M
  return((xmlParserErrors) ctxt->errNo);
12298
3.01M
    else
12299
3.01M
        return(0);
12300
5.95M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
736k
                        const char *chunk, int size, const char *filename) {
12330
736k
    xmlParserCtxtPtr ctxt;
12331
736k
    xmlParserInputPtr inputStream;
12332
736k
    xmlParserInputBufferPtr buf;
12333
736k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
736k
    if ((chunk != NULL) && (size >= 4))
12339
360k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
736k
    buf = xmlAllocParserInputBuffer(enc);
12342
736k
    if (buf == NULL) return(NULL);
12343
12344
736k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
736k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
736k
    ctxt->dictNames = 1;
12351
736k
    if (filename == NULL) {
12352
368k
  ctxt->directory = NULL;
12353
368k
    } else {
12354
368k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
368k
    }
12356
12357
736k
    inputStream = xmlNewInputStream(ctxt);
12358
736k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
736k
    if (filename == NULL)
12365
368k
  inputStream->filename = NULL;
12366
368k
    else {
12367
368k
  inputStream->filename = (char *)
12368
368k
      xmlCanonicPath((const xmlChar *) filename);
12369
368k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
368k
    }
12376
736k
    inputStream->buf = buf;
12377
736k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
736k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
736k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
736k
    if ((size != 0) && (chunk != NULL) &&
12388
736k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
360k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
360k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
360k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
360k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
360k
    }
12399
12400
736k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
190k
        xmlSwitchEncoding(ctxt, enc);
12402
190k
    }
12403
12404
736k
    return(ctxt);
12405
736k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
657k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
657k
    if (ctxt == NULL)
12418
0
        return;
12419
657k
    ctxt->instate = XML_PARSER_EOF;
12420
657k
    ctxt->disableSAX = 1;
12421
666k
    while (ctxt->inputNr > 1)
12422
9.38k
        xmlFreeInputStream(inputPop(ctxt));
12423
657k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
657k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
657k
        if (ctxt->input->buf != NULL) {
12433
581k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
581k
            ctxt->input->buf = NULL;
12435
581k
        }
12436
657k
  ctxt->input->cur = BAD_CAST"";
12437
657k
        ctxt->input->length = 0;
12438
657k
  ctxt->input->base = ctxt->input->cur;
12439
657k
        ctxt->input->end = ctxt->input->cur;
12440
657k
    }
12441
657k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
368k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
368k
    if (ctxt == NULL)
12452
0
        return;
12453
368k
    xmlHaltParser(ctxt);
12454
368k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
368k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
182k
          const xmlChar *ID, xmlNodePtr *list) {
12832
182k
    xmlParserCtxtPtr ctxt;
12833
182k
    xmlDocPtr newDoc;
12834
182k
    xmlNodePtr newRoot;
12835
182k
    xmlParserErrors ret = XML_ERR_OK;
12836
182k
    xmlChar start[4];
12837
182k
    xmlCharEncoding enc;
12838
12839
182k
    if (((depth > 40) &&
12840
182k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
182k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
182k
    if (list != NULL)
12848
32.4k
        *list = NULL;
12849
182k
    if ((URL == NULL) && (ID == NULL))
12850
259
  return(XML_ERR_INTERNAL_ERROR);
12851
182k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
182k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
182k
                                             oldctxt);
12856
182k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
32.8k
    if (oldctxt != NULL) {
12858
32.8k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
32.8k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
32.8k
    }
12861
32.8k
    xmlDetectSAX2(ctxt);
12862
12863
32.8k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
32.8k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
32.8k
    newDoc->properties = XML_DOC_INTERNAL;
12869
32.8k
    if (doc) {
12870
32.8k
        newDoc->intSubset = doc->intSubset;
12871
32.8k
        newDoc->extSubset = doc->extSubset;
12872
32.8k
        if (doc->dict) {
12873
18.1k
            newDoc->dict = doc->dict;
12874
18.1k
            xmlDictReference(newDoc->dict);
12875
18.1k
        }
12876
32.8k
        if (doc->URL != NULL) {
12877
20.4k
            newDoc->URL = xmlStrdup(doc->URL);
12878
20.4k
        }
12879
32.8k
    }
12880
32.8k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
32.8k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
32.8k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
32.8k
    nodePush(ctxt, newDoc->children);
12891
32.8k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
32.8k
    } else {
12894
32.8k
        ctxt->myDoc = doc;
12895
32.8k
        newRoot->doc = doc;
12896
32.8k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
32.8k
    GROW;
12904
32.8k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
30.9k
  start[0] = RAW;
12906
30.9k
  start[1] = NXT(1);
12907
30.9k
  start[2] = NXT(2);
12908
30.9k
  start[3] = NXT(3);
12909
30.9k
  enc = xmlDetectCharEncoding(start, 4);
12910
30.9k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
1.87k
      xmlSwitchEncoding(ctxt, enc);
12912
1.87k
  }
12913
30.9k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
32.8k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
1.18k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
1.18k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
1.18k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
149
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
149
                           "Version mismatch between document and entity\n");
12927
149
        }
12928
1.18k
    }
12929
12930
32.8k
    ctxt->instate = XML_PARSER_CONTENT;
12931
32.8k
    ctxt->depth = depth;
12932
32.8k
    if (oldctxt != NULL) {
12933
32.8k
  ctxt->_private = oldctxt->_private;
12934
32.8k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
32.8k
  ctxt->validate = oldctxt->validate;
12936
32.8k
  ctxt->valid = oldctxt->valid;
12937
32.8k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
32.8k
        if (oldctxt->validate) {
12939
13.6k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
13.6k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
13.6k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
13.6k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
13.6k
        }
12944
32.8k
  ctxt->external = oldctxt->external;
12945
32.8k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
32.8k
        ctxt->dict = oldctxt->dict;
12947
32.8k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
32.8k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
32.8k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
32.8k
        ctxt->dictNames = oldctxt->dictNames;
12951
32.8k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
32.8k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
32.8k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
32.8k
  ctxt->record_info = oldctxt->record_info;
12955
32.8k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
32.8k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
32.8k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
32.8k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
32.8k
    xmlParseContent(ctxt);
12970
12971
32.8k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
760
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
32.0k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
32.8k
    if (ctxt->node != newDoc->children) {
12977
4.33k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
4.33k
    }
12979
12980
32.8k
    if (!ctxt->wellFormed) {
12981
12.8k
  ret = (xmlParserErrors)ctxt->errNo;
12982
12.8k
        if (oldctxt != NULL) {
12983
12.8k
            oldctxt->errNo = ctxt->errNo;
12984
12.8k
            oldctxt->wellFormed = 0;
12985
12.8k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
12.8k
        }
12987
19.9k
    } else {
12988
19.9k
  if (list != NULL) {
12989
4.36k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
4.36k
      cur = newDoc->children->children;
12996
4.36k
      *list = cur;
12997
170k
      while (cur != NULL) {
12998
166k
    cur->parent = NULL;
12999
166k
    cur = cur->next;
13000
166k
      }
13001
4.36k
            newDoc->children->children = NULL;
13002
4.36k
  }
13003
19.9k
  ret = XML_ERR_OK;
13004
19.9k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
32.8k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
32.8k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
32.8k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
32.8k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
32.8k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
32.8k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
32.8k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
32.8k
    }
13020
13021
32.8k
    if (oldctxt != NULL) {
13022
32.8k
        ctxt->dict = NULL;
13023
32.8k
        ctxt->attsDefault = NULL;
13024
32.8k
        ctxt->attsSpecial = NULL;
13025
32.8k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
32.8k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
32.8k
        oldctxt->validate = ctxt->validate;
13028
32.8k
        oldctxt->valid = ctxt->valid;
13029
32.8k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
32.8k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
32.8k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
32.8k
    }
13033
32.8k
    ctxt->node_seq.maximum = 0;
13034
32.8k
    ctxt->node_seq.length = 0;
13035
32.8k
    ctxt->node_seq.buffer = NULL;
13036
32.8k
    xmlFreeParserCtxt(ctxt);
13037
32.8k
    newDoc->intSubset = NULL;
13038
32.8k
    newDoc->extSubset = NULL;
13039
32.8k
    xmlFreeDoc(newDoc);
13040
13041
32.8k
    return(ret);
13042
32.8k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
60.6k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
60.6k
    xmlParserCtxtPtr ctxt;
13125
60.6k
    xmlDocPtr newDoc = NULL;
13126
60.6k
    xmlNodePtr newRoot;
13127
60.6k
    xmlSAXHandlerPtr oldsax = NULL;
13128
60.6k
    xmlNodePtr content = NULL;
13129
60.6k
    xmlNodePtr last = NULL;
13130
60.6k
    int size;
13131
60.6k
    xmlParserErrors ret = XML_ERR_OK;
13132
60.6k
#ifdef SAX2
13133
60.6k
    int i;
13134
60.6k
#endif
13135
13136
60.6k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
60.6k
        (oldctxt->depth >  100)) {
13138
72
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
72
                       "Maximum entity nesting depth exceeded");
13140
72
  return(XML_ERR_ENTITY_LOOP);
13141
72
    }
13142
13143
13144
60.5k
    if (lst != NULL)
13145
54.0k
        *lst = NULL;
13146
60.5k
    if (string == NULL)
13147
7
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
60.5k
    size = xmlStrlen(string);
13150
13151
60.5k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
60.5k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
58.8k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
58.8k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
58.8k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
58.8k
    else
13158
58.8k
  ctxt->userData = ctxt;
13159
58.8k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
58.8k
    ctxt->dict = oldctxt->dict;
13161
58.8k
    ctxt->input_id = oldctxt->input_id;
13162
58.8k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
58.8k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
58.8k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
58.8k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
59.3k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
568
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
568
    }
13171
58.8k
#endif
13172
13173
58.8k
    oldsax = ctxt->sax;
13174
58.8k
    ctxt->sax = oldctxt->sax;
13175
58.8k
    xmlDetectSAX2(ctxt);
13176
58.8k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
58.8k
    ctxt->options = oldctxt->options;
13178
13179
58.8k
    ctxt->_private = oldctxt->_private;
13180
58.8k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
58.8k
    } else {
13193
58.8k
  ctxt->myDoc = oldctxt->myDoc;
13194
58.8k
        content = ctxt->myDoc->children;
13195
58.8k
  last = ctxt->myDoc->last;
13196
58.8k
    }
13197
58.8k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
58.8k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
58.8k
    ctxt->myDoc->children = NULL;
13208
58.8k
    ctxt->myDoc->last = NULL;
13209
58.8k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
58.8k
    nodePush(ctxt, ctxt->myDoc->children);
13211
58.8k
    ctxt->instate = XML_PARSER_CONTENT;
13212
58.8k
    ctxt->depth = oldctxt->depth;
13213
13214
58.8k
    ctxt->validate = 0;
13215
58.8k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
58.8k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
52.0k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
52.0k
    }
13222
58.8k
    ctxt->dictNames = oldctxt->dictNames;
13223
58.8k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
58.8k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
58.8k
    xmlParseContent(ctxt);
13227
58.8k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
424
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
58.3k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
58.8k
    if (ctxt->node != ctxt->myDoc->children) {
13233
2.01k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
2.01k
    }
13235
13236
58.8k
    if (!ctxt->wellFormed) {
13237
11.3k
  ret = (xmlParserErrors)ctxt->errNo;
13238
11.3k
        oldctxt->errNo = ctxt->errNo;
13239
11.3k
        oldctxt->wellFormed = 0;
13240
11.3k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
47.4k
    } else {
13242
47.4k
        ret = XML_ERR_OK;
13243
47.4k
    }
13244
13245
58.8k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
42.2k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
42.2k
  cur = ctxt->myDoc->children->children;
13253
42.2k
  *lst = cur;
13254
181k
  while (cur != NULL) {
13255
139k
#ifdef LIBXML_VALID_ENABLED
13256
139k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
139k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
139k
    (cur->type == XML_ELEMENT_NODE)) {
13259
20.6k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
20.6k
      oldctxt->myDoc, cur);
13261
20.6k
      }
13262
139k
#endif /* LIBXML_VALID_ENABLED */
13263
139k
      cur->parent = NULL;
13264
139k
      cur = cur->next;
13265
139k
  }
13266
42.2k
  ctxt->myDoc->children->children = NULL;
13267
42.2k
    }
13268
58.8k
    if (ctxt->myDoc != NULL) {
13269
58.8k
  xmlFreeNode(ctxt->myDoc->children);
13270
58.8k
        ctxt->myDoc->children = content;
13271
58.8k
        ctxt->myDoc->last = last;
13272
58.8k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
58.8k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
58.8k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
58.8k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
58.8k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
58.8k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
58.8k
    }
13285
13286
58.8k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
58.8k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
58.8k
    ctxt->sax = oldsax;
13289
58.8k
    ctxt->dict = NULL;
13290
58.8k
    ctxt->attsDefault = NULL;
13291
58.8k
    ctxt->attsSpecial = NULL;
13292
58.8k
    xmlFreeParserCtxt(ctxt);
13293
58.8k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
58.8k
    return(ret);
13298
58.8k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
182k
        xmlParserCtxtPtr pctx) {
13783
182k
    xmlParserCtxtPtr ctxt;
13784
182k
    xmlParserInputPtr inputStream;
13785
182k
    char *directory = NULL;
13786
182k
    xmlChar *uri;
13787
13788
182k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
182k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
182k
    if (pctx != NULL) {
13794
182k
        ctxt->options = pctx->options;
13795
182k
        ctxt->_private = pctx->_private;
13796
182k
  ctxt->input_id = pctx->input_id;
13797
182k
    }
13798
13799
    /* Don't read from stdin. */
13800
182k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
24
        URL = BAD_CAST "./-";
13802
13803
182k
    uri = xmlBuildURI(URL, base);
13804
13805
182k
    if (uri == NULL) {
13806
6.50k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
6.50k
  if (inputStream == NULL) {
13808
6.39k
      xmlFreeParserCtxt(ctxt);
13809
6.39k
      return(NULL);
13810
6.39k
  }
13811
13812
110
  inputPush(ctxt, inputStream);
13813
13814
110
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
110
      directory = xmlParserGetDirectory((char *)URL);
13816
110
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
110
      ctxt->directory = directory;
13818
176k
    } else {
13819
176k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
176k
  if (inputStream == NULL) {
13821
143k
      xmlFree(uri);
13822
143k
      xmlFreeParserCtxt(ctxt);
13823
143k
      return(NULL);
13824
143k
  }
13825
13826
32.7k
  inputPush(ctxt, inputStream);
13827
13828
32.7k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
32.7k
      directory = xmlParserGetDirectory((char *)uri);
13830
32.7k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
32.7k
      ctxt->directory = directory;
13832
32.7k
  xmlFree(uri);
13833
32.7k
    }
13834
32.8k
    return(ctxt);
13835
182k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
428k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
428k
    xmlParserCtxtPtr ctxt;
14178
428k
    xmlParserInputPtr input;
14179
428k
    xmlParserInputBufferPtr buf;
14180
14181
428k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
428k
    if (size <= 0)
14184
2.45k
  return(NULL);
14185
14186
426k
    ctxt = xmlNewParserCtxt();
14187
426k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
426k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
426k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
426k
    input = xmlNewInputStream(ctxt);
14197
426k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
426k
    input->filename = NULL;
14204
426k
    input->buf = buf;
14205
426k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
426k
    inputPush(ctxt, input);
14208
426k
    return(ctxt);
14209
426k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
345M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
345M
    if (xmlParserInitialized != 0)
14525
345M
  return;
14526
14527
3.70k
#ifdef LIBXML_THREAD_ENABLED
14528
3.70k
    __xmlGlobalInitMutexLock();
14529
3.70k
    if (xmlParserInitialized == 0) {
14530
3.70k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.70k
  xmlInitThreadsInternal();
14537
3.70k
  xmlInitGlobalsInternal();
14538
3.70k
  xmlInitMemoryInternal();
14539
3.70k
        __xmlInitializeDict();
14540
3.70k
  xmlInitEncodingInternal();
14541
3.70k
  xmlRegisterDefaultInputCallbacks();
14542
3.70k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.70k
  xmlRegisterDefaultOutputCallbacks();
14544
3.70k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.70k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.70k
  xmlInitXPathInternal();
14547
3.70k
#endif
14548
3.70k
  xmlParserInitialized = 1;
14549
3.70k
#ifdef LIBXML_THREAD_ENABLED
14550
3.70k
    }
14551
3.70k
    __xmlGlobalInitMutexUnlock();
14552
3.70k
#endif
14553
3.70k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
1.10M
{
14843
1.10M
    if (ctxt == NULL)
14844
0
        return(-1);
14845
1.10M
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
1.10M
    if (options & XML_PARSE_RECOVER) {
14851
641k
        ctxt->recovery = 1;
14852
641k
        options -= XML_PARSE_RECOVER;
14853
641k
  ctxt->options |= XML_PARSE_RECOVER;
14854
641k
    } else
14855
463k
        ctxt->recovery = 0;
14856
1.10M
    if (options & XML_PARSE_DTDLOAD) {
14857
772k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
772k
        options -= XML_PARSE_DTDLOAD;
14859
772k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
772k
    } else
14861
331k
        ctxt->loadsubset = 0;
14862
1.10M
    if (options & XML_PARSE_DTDATTR) {
14863
483k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
483k
        options -= XML_PARSE_DTDATTR;
14865
483k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
483k
    }
14867
1.10M
    if (options & XML_PARSE_NOENT) {
14868
731k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
731k
        options -= XML_PARSE_NOENT;
14871
731k
  ctxt->options |= XML_PARSE_NOENT;
14872
731k
    } else
14873
372k
        ctxt->replaceEntities = 0;
14874
1.10M
    if (options & XML_PARSE_PEDANTIC) {
14875
236k
        ctxt->pedantic = 1;
14876
236k
        options -= XML_PARSE_PEDANTIC;
14877
236k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
236k
    } else
14879
867k
        ctxt->pedantic = 0;
14880
1.10M
    if (options & XML_PARSE_NOBLANKS) {
14881
454k
        ctxt->keepBlanks = 0;
14882
454k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
454k
        options -= XML_PARSE_NOBLANKS;
14884
454k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
454k
    } else
14886
649k
        ctxt->keepBlanks = 1;
14887
1.10M
    if (options & XML_PARSE_DTDVALID) {
14888
454k
        ctxt->validate = 1;
14889
454k
        if (options & XML_PARSE_NOWARNING)
14890
304k
            ctxt->vctxt.warning = NULL;
14891
454k
        if (options & XML_PARSE_NOERROR)
14892
350k
            ctxt->vctxt.error = NULL;
14893
454k
        options -= XML_PARSE_DTDVALID;
14894
454k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
454k
    } else
14896
649k
        ctxt->validate = 0;
14897
1.10M
    if (options & XML_PARSE_NOWARNING) {
14898
436k
        ctxt->sax->warning = NULL;
14899
436k
        options -= XML_PARSE_NOWARNING;
14900
436k
    }
14901
1.10M
    if (options & XML_PARSE_NOERROR) {
14902
565k
        ctxt->sax->error = NULL;
14903
565k
        ctxt->sax->fatalError = NULL;
14904
565k
        options -= XML_PARSE_NOERROR;
14905
565k
    }
14906
1.10M
#ifdef LIBXML_SAX1_ENABLED
14907
1.10M
    if (options & XML_PARSE_SAX1) {
14908
379k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
379k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
379k
        ctxt->sax->startElementNs = NULL;
14911
379k
        ctxt->sax->endElementNs = NULL;
14912
379k
        ctxt->sax->initialized = 1;
14913
379k
        options -= XML_PARSE_SAX1;
14914
379k
  ctxt->options |= XML_PARSE_SAX1;
14915
379k
    }
14916
1.10M
#endif /* LIBXML_SAX1_ENABLED */
14917
1.10M
    if (options & XML_PARSE_NODICT) {
14918
403k
        ctxt->dictNames = 0;
14919
403k
        options -= XML_PARSE_NODICT;
14920
403k
  ctxt->options |= XML_PARSE_NODICT;
14921
700k
    } else {
14922
700k
        ctxt->dictNames = 1;
14923
700k
    }
14924
1.10M
    if (options & XML_PARSE_NOCDATA) {
14925
436k
        ctxt->sax->cdataBlock = NULL;
14926
436k
        options -= XML_PARSE_NOCDATA;
14927
436k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
436k
    }
14929
1.10M
    if (options & XML_PARSE_NSCLEAN) {
14930
581k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
581k
        options -= XML_PARSE_NSCLEAN;
14932
581k
    }
14933
1.10M
    if (options & XML_PARSE_NONET) {
14934
496k
  ctxt->options |= XML_PARSE_NONET;
14935
496k
        options -= XML_PARSE_NONET;
14936
496k
    }
14937
1.10M
    if (options & XML_PARSE_COMPACT) {
14938
673k
  ctxt->options |= XML_PARSE_COMPACT;
14939
673k
        options -= XML_PARSE_COMPACT;
14940
673k
    }
14941
1.10M
    if (options & XML_PARSE_OLD10) {
14942
350k
  ctxt->options |= XML_PARSE_OLD10;
14943
350k
        options -= XML_PARSE_OLD10;
14944
350k
    }
14945
1.10M
    if (options & XML_PARSE_NOBASEFIX) {
14946
420k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
420k
        options -= XML_PARSE_NOBASEFIX;
14948
420k
    }
14949
1.10M
    if (options & XML_PARSE_HUGE) {
14950
342k
  ctxt->options |= XML_PARSE_HUGE;
14951
342k
        options -= XML_PARSE_HUGE;
14952
342k
        if (ctxt->dict != NULL)
14953
342k
            xmlDictSetLimit(ctxt->dict, 0);
14954
342k
    }
14955
1.10M
    if (options & XML_PARSE_OLDSAX) {
14956
358k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
358k
        options -= XML_PARSE_OLDSAX;
14958
358k
    }
14959
1.10M
    if (options & XML_PARSE_IGNORE_ENC) {
14960
532k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
532k
        options -= XML_PARSE_IGNORE_ENC;
14962
532k
    }
14963
1.10M
    if (options & XML_PARSE_BIG_LINES) {
14964
443k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
443k
        options -= XML_PARSE_BIG_LINES;
14966
443k
    }
14967
1.10M
    ctxt->linenumbers = 1;
14968
1.10M
    return (options);
14969
1.10M
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
736k
{
14984
736k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
736k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
367k
{
15003
367k
    xmlDocPtr ret;
15004
15005
367k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
367k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
367k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
367k
        (ctxt->input->filename == NULL))
15015
367k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
367k
    xmlParseDocument(ctxt);
15017
367k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
222k
        ret = ctxt->myDoc;
15019
145k
    else {
15020
145k
        ret = NULL;
15021
145k
  if (ctxt->myDoc != NULL) {
15022
122k
      xmlFreeDoc(ctxt->myDoc);
15023
122k
  }
15024
145k
    }
15025
367k
    ctxt->myDoc = NULL;
15026
367k
    if (!reuse) {
15027
367k
  xmlFreeParserCtxt(ctxt);
15028
367k
    }
15029
15030
367k
    return (ret);
15031
367k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
368k
{
15096
368k
    xmlParserCtxtPtr ctxt;
15097
15098
368k
    xmlInitParser();
15099
368k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
368k
    if (ctxt == NULL)
15101
700
        return (NULL);
15102
367k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
368k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387