Coverage Report

Created: 2024-04-26 11:09

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
4.45M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
372
#define XML_PARSER_NON_LINEAR 10
129
130
19.9M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
431M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
5.65G
#define XML_PARSER_BUFFER_SIZE 100
147
2.27M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
128M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
107k
{
215
107k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
107k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
107k
    if (ctxt != NULL)
219
107k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
107k
    if (prefix == NULL)
222
83.9k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
83.9k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
83.9k
                        (const char *) localname, NULL, NULL, 0, 0,
225
83.9k
                        "Attribute %s redefined\n", localname);
226
23.4k
    else
227
23.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
23.4k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
23.4k
                        (const char *) prefix, (const char *) localname,
230
23.4k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
23.4k
                        localname);
232
107k
    if (ctxt != NULL) {
233
107k
  ctxt->wellFormed = 0;
234
107k
  if (ctxt->recovery == 0)
235
21.9k
      ctxt->disableSAX = 1;
236
107k
    }
237
107k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
19.5M
{
250
19.5M
    const char *errmsg;
251
252
19.5M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
19.5M
        (ctxt->instate == XML_PARSER_EOF))
254
85.5k
  return;
255
19.4M
    switch (error) {
256
250k
        case XML_ERR_INVALID_HEX_CHARREF:
257
250k
            errmsg = "CharRef: invalid hexadecimal value";
258
250k
            break;
259
453k
        case XML_ERR_INVALID_DEC_CHARREF:
260
453k
            errmsg = "CharRef: invalid decimal value";
261
453k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
7.52M
        case XML_ERR_INTERNAL_ERROR:
266
7.52M
            errmsg = "internal error";
267
7.52M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
13.9k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
13.9k
            errmsg = "PEReference: expecting ';'";
282
13.9k
            break;
283
186
        case XML_ERR_ENTITY_LOOP:
284
186
            errmsg = "Detected an entity reference loop";
285
186
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
1.78k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
1.78k
            errmsg = "PEReferences forbidden in internal subset";
291
1.78k
            break;
292
15.4k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
15.4k
            errmsg = "EntityValue: \" or ' expected";
294
15.4k
            break;
295
376k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
376k
            errmsg = "AttValue: \" or ' expected";
297
376k
            break;
298
950k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
950k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
950k
            break;
301
36.9k
        case XML_ERR_LITERAL_NOT_STARTED:
302
36.9k
            errmsg = "SystemLiteral \" or ' expected";
303
36.9k
            break;
304
44.8k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
44.8k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
44.8k
            break;
307
885k
        case XML_ERR_MISPLACED_CDATA_END:
308
885k
            errmsg = "Sequence ']]>' not allowed in content";
309
885k
            break;
310
31.7k
        case XML_ERR_URI_REQUIRED:
311
31.7k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
31.7k
            break;
313
5.28k
        case XML_ERR_PUBID_REQUIRED:
314
5.28k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
5.28k
            break;
316
516k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
516k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
516k
            break;
319
384k
        case XML_ERR_PI_NOT_STARTED:
320
384k
            errmsg = "xmlParsePI : no target name";
321
384k
            break;
322
18.8k
        case XML_ERR_RESERVED_XML_NAME:
323
18.8k
            errmsg = "Invalid PI name";
324
18.8k
            break;
325
2.38k
        case XML_ERR_NOTATION_NOT_STARTED:
326
2.38k
            errmsg = "NOTATION: Name expected here";
327
2.38k
            break;
328
7.19k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
7.19k
            errmsg = "'>' required to close NOTATION declaration";
330
7.19k
            break;
331
28.1k
        case XML_ERR_VALUE_REQUIRED:
332
28.1k
            errmsg = "Entity value required";
333
28.1k
            break;
334
2.16k
        case XML_ERR_URI_FRAGMENT:
335
2.16k
            errmsg = "Fragment not allowed";
336
2.16k
            break;
337
38.5k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
38.5k
            errmsg = "'(' required to start ATTLIST enumeration";
339
38.5k
            break;
340
1.45k
        case XML_ERR_NMTOKEN_REQUIRED:
341
1.45k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
1.45k
            break;
343
2.71k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
2.71k
            errmsg = "')' required to finish ATTLIST enumeration";
345
2.71k
            break;
346
7.98k
        case XML_ERR_MIXED_NOT_STARTED:
347
7.98k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
7.98k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
31.9k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
31.9k
            errmsg = "ContentDecl : Name or '(' expected";
354
31.9k
            break;
355
50.0k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
50.0k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
50.0k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
1.02M
        case XML_ERR_GT_REQUIRED:
363
1.02M
            errmsg = "expected '>'";
364
1.02M
            break;
365
76
        case XML_ERR_CONDSEC_INVALID:
366
76
            errmsg = "XML conditional section '[' expected";
367
76
            break;
368
5.43k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
5.43k
            errmsg = "Content error in the external subset";
370
5.43k
            break;
371
458
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
458
            errmsg =
373
458
                "conditional section INCLUDE or IGNORE keyword expected";
374
458
            break;
375
197
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
197
            errmsg = "XML conditional section not closed";
377
197
            break;
378
93
        case XML_ERR_XMLDECL_NOT_STARTED:
379
93
            errmsg = "Text declaration '<?xml' required";
380
93
            break;
381
1.16M
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
1.16M
            errmsg = "parsing XML declaration: '?>' expected";
383
1.16M
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
1.34M
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
1.34M
            errmsg = "EntityRef: expecting ';'";
389
1.34M
            break;
390
310k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
310k
            errmsg = "DOCTYPE improperly terminated";
392
310k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
77.6k
        case XML_ERR_EQUAL_REQUIRED:
397
77.6k
            errmsg = "expected '='";
398
77.6k
            break;
399
245k
        case XML_ERR_STRING_NOT_CLOSED:
400
245k
            errmsg = "String not closed expecting \" or '";
401
245k
            break;
402
51.5k
        case XML_ERR_STRING_NOT_STARTED:
403
51.5k
            errmsg = "String not started expecting ' or \"";
404
51.5k
            break;
405
14.5k
        case XML_ERR_ENCODING_NAME:
406
14.5k
            errmsg = "Invalid XML encoding name";
407
14.5k
            break;
408
6.31k
        case XML_ERR_STANDALONE_VALUE:
409
6.31k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
6.31k
            break;
411
570k
        case XML_ERR_DOCUMENT_EMPTY:
412
570k
            errmsg = "Document is empty";
413
570k
            break;
414
2.31M
        case XML_ERR_DOCUMENT_END:
415
2.31M
            errmsg = "Extra content at the end of the document";
416
2.31M
            break;
417
1.85k
        case XML_ERR_NOT_WELL_BALANCED:
418
1.85k
            errmsg = "chunk is not well balanced";
419
1.85k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
620k
        case XML_ERR_VERSION_MISSING:
424
620k
            errmsg = "Malformed declaration expecting version";
425
620k
            break;
426
0
        case XML_ERR_NAME_TOO_LONG:
427
0
            errmsg = "Name too long";
428
0
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
8.01k
        default:
435
8.01k
            errmsg = "Unregistered error message";
436
19.4M
    }
437
19.4M
    if (ctxt != NULL)
438
19.4M
  ctxt->errNo = error;
439
19.4M
    if (info == NULL) {
440
11.9M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
11.9M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
11.9M
                        errmsg);
443
11.9M
    } else {
444
7.52M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
7.52M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
7.52M
                        errmsg, info);
447
7.52M
    }
448
19.4M
    if (ctxt != NULL) {
449
19.4M
  ctxt->wellFormed = 0;
450
19.4M
  if (ctxt->recovery == 0)
451
2.97M
      ctxt->disableSAX = 1;
452
19.4M
    }
453
19.4M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
25.3M
{
467
25.3M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
25.3M
        (ctxt->instate == XML_PARSER_EOF))
469
0
  return;
470
25.3M
    if (ctxt != NULL)
471
25.3M
  ctxt->errNo = error;
472
25.3M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
25.3M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
25.3M
    if (ctxt != NULL) {
475
25.3M
  ctxt->wellFormed = 0;
476
25.3M
  if (ctxt->recovery == 0)
477
3.84M
      ctxt->disableSAX = 1;
478
25.3M
    }
479
25.3M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
630k
{
495
630k
    xmlStructuredErrorFunc schannel = NULL;
496
497
630k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
630k
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
630k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
630k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
560k
        schannel = ctxt->sax->serror;
503
630k
    if (ctxt != NULL) {
504
630k
        __xmlRaiseError(schannel,
505
630k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
630k
                    ctxt->userData,
507
630k
                    ctxt, NULL, XML_FROM_PARSER, error,
508
630k
                    XML_ERR_WARNING, NULL, 0,
509
630k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
630k
        msg, (const char *) str1, (const char *) str2);
511
630k
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
630k
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
1.84k
{
533
1.84k
    xmlStructuredErrorFunc schannel = NULL;
534
535
1.84k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
1.84k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
1.84k
    if (ctxt != NULL) {
539
1.84k
  ctxt->errNo = error;
540
1.84k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
1.09k
      schannel = ctxt->sax->serror;
542
1.84k
    }
543
1.84k
    if (ctxt != NULL) {
544
1.84k
        __xmlRaiseError(schannel,
545
1.84k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
1.84k
                    ctxt, NULL, XML_FROM_DTD, error,
547
1.84k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
1.84k
        (const char *) str2, NULL, 0, 0,
549
1.84k
        msg, (const char *) str1, (const char *) str2);
550
1.84k
  ctxt->valid = 0;
551
1.84k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
1.84k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
44.2M
{
573
44.2M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
44.2M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
44.2M
    if (ctxt != NULL)
577
44.2M
  ctxt->errNo = error;
578
44.2M
    __xmlRaiseError(NULL, NULL, NULL,
579
44.2M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
44.2M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
44.2M
    if (ctxt != NULL) {
582
44.2M
  ctxt->wellFormed = 0;
583
44.2M
  if (ctxt->recovery == 0)
584
1.82M
      ctxt->disableSAX = 1;
585
44.2M
    }
586
44.2M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
5.89M
{
604
5.89M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
5.89M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
5.89M
    if (ctxt != NULL)
608
5.89M
  ctxt->errNo = error;
609
5.89M
    __xmlRaiseError(NULL, NULL, NULL,
610
5.89M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
5.89M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
5.89M
        NULL, val, 0, msg, str1, val, str2);
613
5.89M
    if (ctxt != NULL) {
614
5.89M
  ctxt->wellFormed = 0;
615
5.89M
  if (ctxt->recovery == 0)
616
1.25M
      ctxt->disableSAX = 1;
617
5.89M
    }
618
5.89M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
13.7M
{
633
13.7M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
13.7M
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
13.7M
    if (ctxt != NULL)
637
13.7M
  ctxt->errNo = error;
638
13.7M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
13.7M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
13.7M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
13.7M
                    val);
642
13.7M
    if (ctxt != NULL) {
643
13.7M
  ctxt->wellFormed = 0;
644
13.7M
  if (ctxt->recovery == 0)
645
1.23M
      ctxt->disableSAX = 1;
646
13.7M
    }
647
13.7M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
79.7k
{
662
79.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
79.7k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
79.7k
    if (ctxt != NULL)
666
79.7k
  ctxt->errNo = error;
667
79.7k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
79.7k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
79.7k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
79.7k
                    val);
671
79.7k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
5.31M
{
689
5.31M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
5.31M
        (ctxt->instate == XML_PARSER_EOF))
691
0
  return;
692
5.31M
    if (ctxt != NULL)
693
5.31M
  ctxt->errNo = error;
694
5.31M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
5.31M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
5.31M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
5.31M
                    info1, info2, info3);
698
5.31M
    if (ctxt != NULL)
699
5.31M
  ctxt->nsWellFormed = 0;
700
5.31M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
241k
{
718
241k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
241k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
241k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
241k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
241k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
241k
                    info1, info2, info3);
725
241k
}
726
727
static void
728
72.2M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
72.2M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
72.2M
    else
732
72.2M
        *dst += val;
733
72.2M
}
734
735
static void
736
20.0M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
20.0M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
20.0M
    else
740
20.0M
        *dst += val;
741
20.0M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
19.9M
{
770
19.9M
    unsigned long consumed;
771
19.9M
    xmlParserInputPtr input = ctxt->input;
772
19.9M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
19.9M
    consumed = input->parentConsumed;
779
19.9M
    if ((entity == NULL) ||
780
19.9M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
12.2M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
12.2M
        xmlSaturatedAdd(&consumed, input->consumed);
783
12.2M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
12.2M
    }
785
19.9M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
19.9M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
19.9M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
19.9M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
19.9M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
372
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
372
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
372
                       "Maximum entity amplification factor exceeded");
803
372
        xmlHaltParser(ctxt);
804
372
        return(1);
805
372
    }
806
807
19.9M
    return(0);
808
19.9M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
9.96M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
9.96M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
9.96M
    (void) sax;
1048
1049
9.96M
    if (ctxt == NULL) return;
1050
9.96M
    sax = ctxt->sax;
1051
9.96M
#ifdef LIBXML_SAX1_ENABLED
1052
9.96M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
9.96M
        ((sax->startElementNs != NULL) ||
1054
6.28M
         (sax->endElementNs != NULL) ||
1055
6.28M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
6.28M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
9.96M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
9.96M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
9.96M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
9.96M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
9.96M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
9.96M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
32.5k
{
1103
32.5k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
34.9k
    while (*src == 0x20) src++;
1107
1.09M
    while (*src != 0) {
1108
1.06M
  if (*src == 0x20) {
1109
68.1k
      while (*src == 0x20) src++;
1110
25.7k
      if (*src != 0)
1111
20.3k
    *dst++ = 0x20;
1112
1.03M
  } else {
1113
1.03M
      *dst++ = *src++;
1114
1.03M
  }
1115
1.06M
    }
1116
32.5k
    *dst = 0;
1117
32.5k
    if (dst == src)
1118
25.5k
       return(NULL);
1119
7.03k
    return(dst);
1120
32.5k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
19.3k
{
1136
19.3k
    int i;
1137
19.3k
    int remove_head = 0;
1138
19.3k
    int need_realloc = 0;
1139
19.3k
    const xmlChar *cur;
1140
1141
19.3k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
19.3k
    i = *len;
1144
19.3k
    if (i <= 0)
1145
944
        return(NULL);
1146
1147
18.4k
    cur = src;
1148
25.1k
    while (*cur == 0x20) {
1149
6.73k
        cur++;
1150
6.73k
  remove_head++;
1151
6.73k
    }
1152
304k
    while (*cur != 0) {
1153
288k
  if (*cur == 0x20) {
1154
21.4k
      cur++;
1155
21.4k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
2.62k
          need_realloc = 1;
1157
2.62k
    break;
1158
2.62k
      }
1159
21.4k
  } else
1160
266k
      cur++;
1161
288k
    }
1162
18.4k
    if (need_realloc) {
1163
2.62k
        xmlChar *ret;
1164
1165
2.62k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
2.62k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
2.62k
  xmlAttrNormalizeSpace(ret, ret);
1171
2.62k
  *len = strlen((const char *)ret);
1172
2.62k
        return(ret);
1173
15.8k
    } else if (remove_head) {
1174
2.54k
        *len -= remove_head;
1175
2.54k
        memmove(src, src + remove_head, 1 + *len);
1176
2.54k
  return(src);
1177
2.54k
    }
1178
13.2k
    return(NULL);
1179
18.4k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
40.9k
               const xmlChar *value) {
1195
40.9k
    xmlDefAttrsPtr defaults;
1196
40.9k
    int len;
1197
40.9k
    const xmlChar *name;
1198
40.9k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
40.9k
    if (ctxt->attsSpecial != NULL) {
1204
16.9k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
1.50k
      return;
1206
16.9k
    }
1207
1208
39.4k
    if (ctxt->attsDefault == NULL) {
1209
25.4k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
25.4k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
25.4k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
39.4k
    name = xmlSplitQName3(fullname, &len);
1219
39.4k
    if (name == NULL) {
1220
36.3k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
36.3k
  prefix = NULL;
1222
36.3k
    } else {
1223
3.13k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
3.13k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
3.13k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
39.4k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
39.4k
    if (defaults == NULL) {
1232
31.5k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
31.5k
                     (4 * 5) * sizeof(const xmlChar *));
1234
31.5k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
31.5k
  defaults->nbAttrs = 0;
1237
31.5k
  defaults->maxAttrs = 4;
1238
31.5k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
31.5k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
31.5k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
86
        xmlDefAttrsPtr temp;
1245
1246
86
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
86
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
86
  if (temp == NULL)
1249
0
      goto mem_error;
1250
86
  defaults = temp;
1251
86
  defaults->maxAttrs *= 2;
1252
86
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
86
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
86
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
39.4k
    name = xmlSplitQName3(fullattr, &len);
1264
39.4k
    if (name == NULL) {
1265
31.9k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
31.9k
  prefix = NULL;
1267
31.9k
    } else {
1268
7.52k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
7.52k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
7.52k
    }
1271
1272
39.4k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
39.4k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
39.4k
    len = xmlStrlen(value);
1276
39.4k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
39.4k
    if (value == NULL)
1278
0
        goto mem_error;
1279
39.4k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
39.4k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
39.4k
    if (ctxt->external)
1282
6.64k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
32.7k
    else
1284
32.7k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
39.4k
    defaults->nbAttrs++;
1286
1287
39.4k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
39.4k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
301k
{
1309
301k
    if (ctxt->attsSpecial == NULL) {
1310
90.4k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
90.4k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
90.4k
    }
1314
1315
301k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
6.10k
        return;
1317
1318
295k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
295k
                     (void *) (ptrdiff_t) type);
1320
295k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
301k
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
203k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
203k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
203k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
75.1k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
75.1k
    }
1341
203k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
379k
{
1354
379k
    if (ctxt->attsSpecial == NULL)
1355
329k
        return;
1356
1357
50.3k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
50.3k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
5.75k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
5.75k
        ctxt->attsSpecial = NULL;
1362
5.75k
    }
1363
50.3k
    return;
1364
379k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
55.3k
{
1427
55.3k
    const xmlChar *cur = lang, *nxt;
1428
1429
55.3k
    if (cur == NULL)
1430
1.78k
        return (0);
1431
53.5k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
53.5k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
53.5k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
53.5k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
2.40k
        cur += 2;
1441
10.8k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
10.8k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
8.44k
            cur++;
1444
2.40k
        return(cur[0] == 0);
1445
2.40k
    }
1446
51.1k
    nxt = cur;
1447
180k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
180k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
129k
           nxt++;
1450
51.1k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
5.00k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
3.89k
            return(0);
1456
1.11k
        return(1);
1457
5.00k
    }
1458
46.1k
    if (nxt - cur < 2)
1459
8.18k
        return(0);
1460
    /* we got an ISO 639 code */
1461
37.9k
    if (nxt[0] == 0)
1462
17.1k
        return(1);
1463
20.8k
    if (nxt[0] != '-')
1464
4.56k
        return(0);
1465
1466
16.2k
    nxt++;
1467
16.2k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
16.2k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
1.69k
        goto region_m49;
1471
1472
64.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
64.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
50.3k
           nxt++;
1475
14.5k
    if (nxt - cur == 4)
1476
3.29k
        goto script;
1477
11.2k
    if (nxt - cur == 2)
1478
2.92k
        goto region;
1479
8.33k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
1.35k
        goto variant;
1481
6.98k
    if (nxt - cur != 3)
1482
2.63k
        return(0);
1483
    /* we parsed an extlang */
1484
4.35k
    if (nxt[0] == 0)
1485
419
        return(1);
1486
3.93k
    if (nxt[0] != '-')
1487
614
        return(0);
1488
1489
3.31k
    nxt++;
1490
3.31k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
3.31k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
557
        goto region_m49;
1494
1495
19.2k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
19.2k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
16.5k
           nxt++;
1498
2.76k
    if (nxt - cur == 2)
1499
565
        goto region;
1500
2.19k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
323
        goto variant;
1502
1.87k
    if (nxt - cur != 4)
1503
1.36k
        return(0);
1504
    /* we parsed a script */
1505
3.79k
script:
1506
3.79k
    if (nxt[0] == 0)
1507
359
        return(1);
1508
3.43k
    if (nxt[0] != '-')
1509
618
        return(0);
1510
1511
2.82k
    nxt++;
1512
2.82k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
2.82k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
492
        goto region_m49;
1516
1517
15.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
15.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
12.9k
           nxt++;
1520
1521
2.32k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
341
        goto variant;
1523
1.98k
    if (nxt - cur != 2)
1524
1.58k
        return(0);
1525
    /* we parsed a region */
1526
4.50k
region:
1527
4.50k
    if (nxt[0] == 0)
1528
712
        return(1);
1529
3.79k
    if (nxt[0] != '-')
1530
1.66k
        return(0);
1531
1532
2.13k
    nxt++;
1533
2.13k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
14.2k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
14.2k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
12.1k
           nxt++;
1538
1539
2.13k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
1.79k
        return(0);
1541
1542
    /* we parsed a variant */
1543
2.35k
variant:
1544
2.35k
    if (nxt[0] == 0)
1545
677
        return(1);
1546
1.67k
    if (nxt[0] != '-')
1547
1.23k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
446
    return (1);
1550
1551
2.74k
region_m49:
1552
2.74k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
2.74k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
620
        nxt += 3;
1555
620
        goto region;
1556
620
    }
1557
2.12k
    return(0);
1558
2.74k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
1.23M
{
1584
1.23M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
617k
        int i;
1586
813k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
427k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
232k
          if (ctxt->nsTab[i + 1] == URL)
1590
109k
        return(-2);
1591
    /* out of scope keep it */
1592
122k
    break;
1593
232k
      }
1594
427k
  }
1595
617k
    }
1596
1.12M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
481k
  ctxt->nsMax = 10;
1598
481k
  ctxt->nsNr = 0;
1599
481k
  ctxt->nsTab = (const xmlChar **)
1600
481k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
481k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
639k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
27.1k
        const xmlChar ** tmp;
1608
27.1k
        ctxt->nsMax *= 2;
1609
27.1k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
27.1k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
27.1k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
27.1k
  ctxt->nsTab = tmp;
1617
27.1k
    }
1618
1.12M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
1.12M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
1.12M
    return (ctxt->nsNr);
1621
1.12M
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
457k
{
1634
457k
    int i;
1635
1636
457k
    if (ctxt->nsTab == NULL) return(0);
1637
457k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
457k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
1.43M
    for (i = 0;i < nr;i++) {
1645
972k
         ctxt->nsNr--;
1646
972k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
972k
    }
1648
457k
    return(nr);
1649
457k
}
1650
#endif
1651
1652
static int
1653
695k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
695k
    const xmlChar **atts;
1655
695k
    int *attallocs;
1656
695k
    int maxatts;
1657
1658
695k
    if (nr + 5 > ctxt->maxatts) {
1659
695k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
695k
  atts = (const xmlChar **) xmlMalloc(
1661
695k
             maxatts * sizeof(const xmlChar *));
1662
695k
  if (atts == NULL) goto mem_error;
1663
695k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
695k
                               (maxatts / 5) * sizeof(int));
1665
695k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
695k
        if (ctxt->maxatts > 0)
1670
757
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
695k
        xmlFree(ctxt->atts);
1672
695k
  ctxt->atts = atts;
1673
695k
  ctxt->attallocs = attallocs;
1674
695k
  ctxt->maxatts = maxatts;
1675
695k
    }
1676
695k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
695k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
14.9M
{
1694
14.9M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
14.9M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
141
        size_t newSize = ctxt->inputMax * 2;
1698
141
        xmlParserInputPtr *tmp;
1699
1700
141
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
141
                                               newSize * sizeof(*tmp));
1702
141
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
141
        ctxt->inputTab = tmp;
1707
141
        ctxt->inputMax = newSize;
1708
141
    }
1709
14.9M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
14.9M
    ctxt->input = value;
1711
14.9M
    return (ctxt->inputNr++);
1712
14.9M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
29.4M
{
1724
29.4M
    xmlParserInputPtr ret;
1725
1726
29.4M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
29.4M
    if (ctxt->inputNr <= 0)
1729
14.5M
        return (NULL);
1730
14.9M
    ctxt->inputNr--;
1731
14.9M
    if (ctxt->inputNr > 0)
1732
7.66M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
7.24M
    else
1734
7.24M
        ctxt->input = NULL;
1735
14.9M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
14.9M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
14.9M
    return (ret);
1738
29.4M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
19.9M
{
1751
19.9M
    if (ctxt == NULL) return(0);
1752
19.9M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
88.1k
        xmlNodePtr *tmp;
1754
1755
88.1k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
88.1k
                                      ctxt->nodeMax * 2 *
1757
88.1k
                                      sizeof(ctxt->nodeTab[0]));
1758
88.1k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
88.1k
        ctxt->nodeTab = tmp;
1763
88.1k
  ctxt->nodeMax *= 2;
1764
88.1k
    }
1765
19.9M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
19.9M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
19.9M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
19.9M
    ctxt->node = value;
1775
19.9M
    return (ctxt->nodeNr++);
1776
19.9M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
15.7M
{
1789
15.7M
    xmlNodePtr ret;
1790
1791
15.7M
    if (ctxt == NULL) return(NULL);
1792
15.7M
    if (ctxt->nodeNr <= 0)
1793
1.22M
        return (NULL);
1794
14.5M
    ctxt->nodeNr--;
1795
14.5M
    if (ctxt->nodeNr > 0)
1796
10.1M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
4.34M
    else
1798
4.34M
        ctxt->node = NULL;
1799
14.5M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
14.5M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
14.5M
    return (ret);
1802
15.7M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
20.9M
{
1821
20.9M
    xmlStartTag *tag;
1822
1823
20.9M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
383k
        const xmlChar * *tmp;
1825
383k
        xmlStartTag *tmp2;
1826
383k
        ctxt->nameMax *= 2;
1827
383k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
383k
                                    ctxt->nameMax *
1829
383k
                                    sizeof(ctxt->nameTab[0]));
1830
383k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
383k
  ctxt->nameTab = tmp;
1835
383k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
383k
                                    ctxt->nameMax *
1837
383k
                                    sizeof(ctxt->pushTab[0]));
1838
383k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
383k
  ctxt->pushTab = tmp2;
1843
20.6M
    } else if (ctxt->pushTab == NULL) {
1844
4.38M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
4.38M
                                            sizeof(ctxt->pushTab[0]));
1846
4.38M
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
4.38M
    }
1849
20.9M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
20.9M
    ctxt->name = value;
1851
20.9M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
20.9M
    tag->prefix = prefix;
1853
20.9M
    tag->URI = URI;
1854
20.9M
    tag->line = line;
1855
20.9M
    tag->nsNr = nsNr;
1856
20.9M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
20.9M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.42M
{
1873
1.42M
    const xmlChar *ret;
1874
1875
1.42M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.42M
    ctxt->nameNr--;
1878
1.42M
    if (ctxt->nameNr > 0)
1879
1.38M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
39.6k
    else
1881
39.6k
        ctxt->name = NULL;
1882
1.42M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.42M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.42M
    return (ret);
1885
1.42M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
5.84M
{
1931
5.84M
    const xmlChar *ret;
1932
1933
5.84M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
5.84M
    ctxt->nameNr--;
1936
5.84M
    if (ctxt->nameNr > 0)
1937
5.05M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
789k
    else
1939
789k
        ctxt->name = NULL;
1940
5.84M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
5.84M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
5.84M
    return (ret);
1943
5.84M
}
1944
1945
25.3M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
25.3M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
124k
        int *tmp;
1948
1949
124k
  ctxt->spaceMax *= 2;
1950
124k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
124k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
124k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
124k
  ctxt->spaceTab = tmp;
1958
124k
    }
1959
25.3M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
25.3M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
25.3M
    return(ctxt->spaceNr++);
1962
25.3M
}
1963
1964
19.5M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
19.5M
    int ret;
1966
19.5M
    if (ctxt->spaceNr <= 0) return(0);
1967
19.4M
    ctxt->spaceNr--;
1968
19.4M
    if (ctxt->spaceNr > 0)
1969
18.7M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
685k
    else
1971
685k
        ctxt->space = &ctxt->spaceTab[0];
1972
19.4M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
19.4M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
19.4M
    return(ret);
1975
19.5M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
330M
#define RAW (*ctxt->input->cur)
2013
206M
#define CUR (*ctxt->input->cur)
2014
206M
#define NXT(val) ctxt->input->cur[(val)]
2015
25.0M
#define CUR_PTR ctxt->input->cur
2016
15.1M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
93.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
48.8M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
85.7M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
78.6M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
71.9M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
66.2M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
33.2M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
33.2M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
1.34M
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
1.34M
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
49.6M
#define SKIP(val) do {             \
2037
49.6M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
49.6M
    if (*ctxt->input->cur == 0)           \
2039
49.6M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
49.6M
  } while (0)
2041
2042
294k
#define SKIPL(val) do {             \
2043
294k
    int skipl;                \
2044
29.9M
    for(skipl=0; skipl<val; skipl++) {         \
2045
29.6M
  if (*(ctxt->input->cur) == '\n') {       \
2046
536k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
29.1M
  } else ctxt->input->col++;         \
2048
29.6M
  ctxt->input->cur++;           \
2049
29.6M
    }                  \
2050
294k
    if (*ctxt->input->cur == 0)           \
2051
294k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
294k
  } while (0)
2053
2054
257M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
257M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
257M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
257M
  xmlSHRINK (ctxt);
2058
2059
9.64M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
9.64M
    if ((ctxt->input->buf) &&
2062
9.64M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
45.6k
        xmlParserInputShrink(ctxt->input);
2064
9.64M
    if (*ctxt->input->cur == 0)
2065
345k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
9.64M
}
2067
2068
528M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
528M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
528M
  xmlGROW (ctxt);
2071
2072
65.8M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
65.8M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
65.8M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
65.8M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
65.8M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
65.8M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
65.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
65.8M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
65.8M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
65.8M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
65.8M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
2.88M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
65.8M
}
2095
2096
95.1M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
179M
#define NEXT xmlNextChar(ctxt)
2099
2100
30.8M
#define NEXT1 {               \
2101
30.8M
  ctxt->input->col++;           \
2102
30.8M
  ctxt->input->cur++;           \
2103
30.8M
  if (*ctxt->input->cur == 0)         \
2104
30.8M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
30.8M
    }
2106
2107
719M
#define NEXTL(l) do {             \
2108
719M
    if (*(ctxt->input->cur) == '\n') {         \
2109
9.18M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
710M
    } else ctxt->input->col++;           \
2111
719M
    ctxt->input->cur += l;        \
2112
719M
  } while (0)
2113
2114
806M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
1.82G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
2.39G
    if (l == 1) b[i++] = v;           \
2119
2.39G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
95.1M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
95.1M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
95.1M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
95.1M
        (ctxt->instate == XML_PARSER_START)) {
2141
77.8M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
77.8M
  cur = ctxt->input->cur;
2146
77.8M
  while (IS_BLANK_CH(*cur)) {
2147
28.6M
      if (*cur == '\n') {
2148
3.47M
    ctxt->input->line++; ctxt->input->col = 1;
2149
25.1M
      } else {
2150
25.1M
    ctxt->input->col++;
2151
25.1M
      }
2152
28.6M
      cur++;
2153
28.6M
      if (res < INT_MAX)
2154
28.6M
    res++;
2155
28.6M
      if (*cur == 0) {
2156
106k
    ctxt->input->cur = cur;
2157
106k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
106k
    cur = ctxt->input->cur;
2159
106k
      }
2160
28.6M
  }
2161
77.8M
  ctxt->input->cur = cur;
2162
77.8M
    } else {
2163
17.3M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
45.1M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
45.1M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
12.0M
    NEXT;
2168
33.0M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
8.37M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
275k
                    break;
2174
8.10M
          xmlParsePEReference(ctxt);
2175
24.7M
            } else if (CUR == 0) {
2176
7.72M
                unsigned long consumed;
2177
7.72M
                xmlEntityPtr ent;
2178
2179
7.72M
                if (ctxt->inputNr <= 1)
2180
65.5k
                    break;
2181
2182
7.66M
                consumed = ctxt->input->consumed;
2183
7.66M
                xmlSaturatedAddSizeT(&consumed,
2184
7.66M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
7.66M
                ent = ctxt->input->entity;
2191
7.66M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
7.66M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
440
                    ent->flags |= XML_ENT_PARSED;
2194
2195
440
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
440
                }
2197
2198
7.66M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
7.66M
                xmlPopInput(ctxt);
2201
16.9M
            } else {
2202
16.9M
                break;
2203
16.9M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
27.8M
      if (res < INT_MAX)
2213
27.8M
    res++;
2214
27.8M
        }
2215
17.3M
    }
2216
95.1M
    return(res);
2217
95.1M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
7.66M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
7.66M
    xmlParserInputPtr input;
2237
2238
7.66M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
7.66M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
7.66M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
7.66M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
7.66M
    input = inputPop(ctxt);
2247
7.66M
    if (input->entity != NULL)
2248
7.66M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
7.66M
    xmlFreeInputStream(input);
2250
7.66M
    if (*ctxt->input->cur == 0)
2251
3.78M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
7.66M
    return(CUR);
2253
7.66M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
7.69M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
7.69M
    int ret;
2267
7.69M
    if (input == NULL) return(-1);
2268
2269
7.67M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
7.67M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
7.67M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
7.67M
    ret = inputPush(ctxt, input);
2285
7.67M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
7.67M
    GROW;
2288
7.67M
    return(ret);
2289
7.67M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
2.27M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
2.27M
    int val = 0;
2311
2.27M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
2.27M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
2.27M
        (NXT(2) == 'x')) {
2318
666k
  SKIP(3);
2319
666k
  GROW;
2320
2.60M
  while (RAW != ';') { /* loop blocked by count */
2321
2.18M
      if (count++ > 20) {
2322
71.7k
    count = 0;
2323
71.7k
    GROW;
2324
71.7k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
71.7k
      }
2327
2.18M
      if ((RAW >= '0') && (RAW <= '9'))
2328
1.36M
          val = val * 16 + (CUR - '0');
2329
821k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
294k
          val = val * 16 + (CUR - 'a') + 10;
2331
526k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
283k
          val = val * 16 + (CUR - 'A') + 10;
2333
243k
      else {
2334
243k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
243k
    val = 0;
2336
243k
    break;
2337
243k
      }
2338
1.94M
      if (val > 0x110000)
2339
776k
          val = 0x110000;
2340
2341
1.94M
      NEXT;
2342
1.94M
      count++;
2343
1.94M
  }
2344
666k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
422k
      ctxt->input->col++;
2347
422k
      ctxt->input->cur++;
2348
422k
  }
2349
1.61M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
1.61M
  SKIP(2);
2351
1.61M
  GROW;
2352
5.59M
  while (RAW != ';') { /* loop blocked by count */
2353
4.42M
      if (count++ > 20) {
2354
75.4k
    count = 0;
2355
75.4k
    GROW;
2356
75.4k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
75.4k
      }
2359
4.42M
      if ((RAW >= '0') && (RAW <= '9'))
2360
3.98M
          val = val * 10 + (CUR - '0');
2361
439k
      else {
2362
439k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
439k
    val = 0;
2364
439k
    break;
2365
439k
      }
2366
3.98M
      if (val > 0x110000)
2367
689k
          val = 0x110000;
2368
2369
3.98M
      NEXT;
2370
3.98M
      count++;
2371
3.98M
  }
2372
1.61M
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
1.17M
      ctxt->input->col++;
2375
1.17M
      ctxt->input->cur++;
2376
1.17M
  }
2377
1.61M
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
2.27M
    if (val >= 0x110000) {
2389
13.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
13.8k
                "xmlParseCharRef: character reference out of bounds\n",
2391
13.8k
          val);
2392
2.26M
    } else if (IS_CHAR(val)) {
2393
1.43M
        return(val);
2394
1.43M
    } else {
2395
823k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
823k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
823k
                    val);
2398
823k
    }
2399
836k
    return(0);
2400
2.27M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
179k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
179k
    const xmlChar *ptr;
2423
179k
    xmlChar cur;
2424
179k
    int val = 0;
2425
2426
179k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
179k
    ptr = *str;
2428
179k
    cur = *ptr;
2429
179k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
61.5k
  ptr += 3;
2431
61.5k
  cur = *ptr;
2432
133k
  while (cur != ';') { /* Non input consuming loop */
2433
77.9k
      if ((cur >= '0') && (cur <= '9'))
2434
13.1k
          val = val * 16 + (cur - '0');
2435
64.8k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
4.05k
          val = val * 16 + (cur - 'a') + 10;
2437
60.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
54.3k
          val = val * 16 + (cur - 'A') + 10;
2439
6.46k
      else {
2440
6.46k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
6.46k
    val = 0;
2442
6.46k
    break;
2443
6.46k
      }
2444
71.4k
      if (val > 0x110000)
2445
5.04k
          val = 0x110000;
2446
2447
71.4k
      ptr++;
2448
71.4k
      cur = *ptr;
2449
71.4k
  }
2450
61.5k
  if (cur == ';')
2451
55.0k
      ptr++;
2452
118k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
118k
  ptr += 2;
2454
118k
  cur = *ptr;
2455
423k
  while (cur != ';') { /* Non input consuming loops */
2456
319k
      if ((cur >= '0') && (cur <= '9'))
2457
305k
          val = val * 10 + (cur - '0');
2458
13.9k
      else {
2459
13.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
13.9k
    val = 0;
2461
13.9k
    break;
2462
13.9k
      }
2463
305k
      if (val > 0x110000)
2464
10.0k
          val = 0x110000;
2465
2466
305k
      ptr++;
2467
305k
      cur = *ptr;
2468
305k
  }
2469
118k
  if (cur == ';')
2470
104k
      ptr++;
2471
118k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
179k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
179k
    if (val >= 0x110000) {
2483
111
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
111
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
111
                val);
2486
179k
    } else if (IS_CHAR(val)) {
2487
157k
        return(val);
2488
157k
    } else {
2489
21.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
21.8k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
21.8k
        val);
2492
21.8k
    }
2493
22.0k
    return(0);
2494
179k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
850k
#define growBuffer(buffer, n) {           \
2593
850k
    xmlChar *tmp;             \
2594
850k
    size_t new_size = buffer##_size * 2 + n;                            \
2595
850k
    if (new_size < buffer##_size) goto mem_error;                       \
2596
850k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
850k
    if (tmp == NULL) goto mem_error;         \
2598
850k
    buffer = tmp;             \
2599
850k
    buffer##_size = new_size;                                           \
2600
850k
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
12.5M
                           int check) {
2617
12.5M
    xmlChar *buffer = NULL;
2618
12.5M
    size_t buffer_size = 0;
2619
12.5M
    size_t nbchars = 0;
2620
2621
12.5M
    xmlChar *current = NULL;
2622
12.5M
    xmlChar *rep = NULL;
2623
12.5M
    const xmlChar *last;
2624
12.5M
    xmlEntityPtr ent;
2625
12.5M
    int c,l;
2626
2627
12.5M
    if (str == NULL)
2628
5.53k
        return(NULL);
2629
12.5M
    last = str + len;
2630
2631
12.5M
    if (((ctxt->depth > 40) &&
2632
12.5M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
12.5M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
12.5M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
12.5M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
12.5M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
12.5M
    if (str < last)
2651
12.5M
  c = CUR_SCHAR(str, l);
2652
31.7k
    else
2653
31.7k
        c = 0;
2654
1.38G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
1.38G
           (c != end2) && (c != end3) &&
2656
1.38G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
1.37G
  if (c == 0) break;
2659
1.37G
        if ((c == '&') && (str[1] == '#')) {
2660
179k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
179k
      if (val == 0)
2662
22.0k
                goto int_error;
2663
157k
      COPY_BUF(0,buffer,nbchars,val);
2664
157k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
190
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
190
      }
2667
1.37G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
11.4M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
11.4M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
11.4M
      if ((ent != NULL) &&
2674
11.4M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
147
    if (ent->content != NULL) {
2676
147
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
147
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
147
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
11.4M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
11.4M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
138
                    goto int_error;
2688
2689
11.4M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
75
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
75
                    xmlHaltParser(ctxt);
2692
75
                    ent->content[0] = 0;
2693
75
                    goto int_error;
2694
75
                }
2695
2696
11.4M
                ent->flags |= XML_ENT_EXPANDING;
2697
11.4M
    ctxt->depth++;
2698
11.4M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
11.4M
                        ent->length, what, 0, 0, 0, check);
2700
11.4M
    ctxt->depth--;
2701
11.4M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
11.4M
    if (rep == NULL) {
2704
2.45k
                    ent->content[0] = 0;
2705
2.45k
                    goto int_error;
2706
2.45k
                }
2707
2708
11.4M
                current = rep;
2709
3.47G
                while (*current != 0) { /* non input consuming loop */
2710
3.46G
                    buffer[nbchars++] = *current++;
2711
3.46G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
1.07M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
1.07M
                    }
2714
3.46G
                }
2715
11.4M
                xmlFree(rep);
2716
11.4M
                rep = NULL;
2717
11.4M
      } else if (ent != NULL) {
2718
40
    int i = xmlStrlen(ent->name);
2719
40
    const xmlChar *cur = ent->name;
2720
2721
40
    buffer[nbchars++] = '&';
2722
40
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
0
    }
2725
140
    for (;i > 0;i--)
2726
100
        buffer[nbchars++] = *cur++;
2727
40
    buffer[nbchars++] = ';';
2728
40
      }
2729
1.36G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
48.0k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
48.0k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
48.0k
      if (ent != NULL) {
2735
31.7k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
45
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
45
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
45
      (ctxt->validate != 0)) {
2745
45
      xmlLoadEntityContent(ctxt, ent);
2746
45
        } else {
2747
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
0
      "not validating will not read content for PE entity %s\n",
2749
0
                          ent->name, NULL);
2750
0
        }
2751
45
    }
2752
2753
31.7k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
93
                    goto int_error;
2755
2756
31.6k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
0
                    xmlHaltParser(ctxt);
2759
0
                    if (ent->content != NULL)
2760
0
                        ent->content[0] = 0;
2761
0
                    goto int_error;
2762
0
                }
2763
2764
31.6k
                ent->flags |= XML_ENT_EXPANDING;
2765
31.6k
    ctxt->depth++;
2766
31.6k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
31.6k
                        ent->length, what, 0, 0, 0, check);
2768
31.6k
    ctxt->depth--;
2769
31.6k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
31.6k
    if (rep == NULL) {
2772
0
                    if (ent->content != NULL)
2773
0
                        ent->content[0] = 0;
2774
0
                    goto int_error;
2775
0
                }
2776
31.6k
                current = rep;
2777
814M
                while (*current != 0) { /* non input consuming loop */
2778
814M
                    buffer[nbchars++] = *current++;
2779
814M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
13.3k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
13.3k
                    }
2782
814M
                }
2783
31.6k
                xmlFree(rep);
2784
31.6k
                rep = NULL;
2785
31.6k
      }
2786
1.36G
  } else {
2787
1.36G
      COPY_BUF(l,buffer,nbchars,c);
2788
1.36G
      str += l;
2789
1.36G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
247k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
247k
      }
2792
1.36G
  }
2793
1.37G
  if (str < last)
2794
1.36G
      c = CUR_SCHAR(str, l);
2795
12.4M
  else
2796
12.4M
      c = 0;
2797
1.37G
    }
2798
12.5M
    buffer[nbchars] = 0;
2799
12.5M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
24.7k
int_error:
2804
24.7k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
24.7k
    if (buffer != NULL)
2807
24.7k
        xmlFree(buffer);
2808
24.7k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
276
                           xmlChar end3) {
2836
276
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
276
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
276
                                      end, end2, end3, 0));
2840
276
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
393k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
393k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
393k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
393k
                                      end, end2, end3, 0));
2868
393k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
17.0M
                     int blank_chars) {
2890
17.0M
    int i, ret;
2891
17.0M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
17.0M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
6.50M
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
10.5M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
10.5M
        (*(ctxt->space) == -2))
2905
6.31M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
4.27M
    if (blank_chars == 0) {
2911
5.53M
  for (i = 0;i < len;i++)
2912
5.22M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
2.78M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
1.80M
    if (ctxt->node == NULL) return(0);
2919
1.58M
    if (ctxt->myDoc != NULL) {
2920
1.58M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
1.58M
        if (ret == 0) return(1);
2922
1.55M
        if (ret == 1) return(0);
2923
1.55M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
1.55M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
1.36M
    if ((ctxt->node->children == NULL) &&
2930
1.36M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
1.35M
    lastChild = xmlGetLastChild(ctxt->node);
2933
1.35M
    if (lastChild == NULL) {
2934
863k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
863k
            (ctxt->node->content != NULL)) return(0);
2936
863k
    } else if (xmlNodeIsText(lastChild))
2937
107k
        return(0);
2938
379k
    else if ((ctxt->node->children != NULL) &&
2939
379k
             (xmlNodeIsText(ctxt->node->children)))
2940
42.1k
        return(0);
2941
1.20M
    return(1);
2942
1.35M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
10.0M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
10.0M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
10.0M
    xmlChar *buffer = NULL;
2973
10.0M
    int len = 0;
2974
10.0M
    int max = XML_MAX_NAMELEN;
2975
10.0M
    xmlChar *ret = NULL;
2976
10.0M
    const xmlChar *cur = name;
2977
10.0M
    int c;
2978
2979
10.0M
    if (prefix == NULL) return(NULL);
2980
10.0M
    *prefix = NULL;
2981
2982
10.0M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
10.0M
    if (cur[0] == ':')
2993
150k
  return(xmlStrdup(name));
2994
2995
9.90M
    c = *cur++;
2996
58.8M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
48.8M
  buf[len++] = c;
2998
48.8M
  c = *cur++;
2999
48.8M
    }
3000
9.90M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
59.1k
  max = len * 2;
3006
3007
59.1k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
59.1k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
59.1k
  memcpy(buffer, buf, len);
3013
3.51M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
3.45M
      if (len + 10 > max) {
3015
13.1k
          xmlChar *tmp;
3016
3017
13.1k
    max *= 2;
3018
13.1k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
13.1k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
13.1k
    buffer = tmp;
3025
13.1k
      }
3026
3.45M
      buffer[len++] = c;
3027
3.45M
      c = *cur++;
3028
3.45M
  }
3029
59.1k
  buffer[len] = 0;
3030
59.1k
    }
3031
3032
9.90M
    if ((c == ':') && (*cur == 0)) {
3033
217k
        if (buffer != NULL)
3034
2.30k
      xmlFree(buffer);
3035
217k
  *prefix = NULL;
3036
217k
  return(xmlStrdup(name));
3037
217k
    }
3038
3039
9.68M
    if (buffer == NULL)
3040
9.63M
  ret = xmlStrndup(buf, len);
3041
56.8k
    else {
3042
56.8k
  ret = buffer;
3043
56.8k
  buffer = NULL;
3044
56.8k
  max = XML_MAX_NAMELEN;
3045
56.8k
    }
3046
3047
3048
9.68M
    if (c == ':') {
3049
3.18M
  c = *cur;
3050
3.18M
        *prefix = ret;
3051
3.18M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
3.18M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
3.18M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
3.18M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
3.18M
        (c == '_') || (c == ':'))) {
3063
215k
      int l;
3064
215k
      int first = CUR_SCHAR(cur, l);
3065
3066
215k
      if (!IS_LETTER(first) && (first != '_')) {
3067
69.1k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
69.1k
          "Name %s is not XML Namespace compliant\n",
3069
69.1k
          name);
3070
69.1k
      }
3071
215k
  }
3072
3.18M
  cur++;
3073
3074
23.6M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
20.4M
      buf[len++] = c;
3076
20.4M
      c = *cur++;
3077
20.4M
  }
3078
3.18M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
37.1k
      max = len * 2;
3084
3085
37.1k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
37.1k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
37.1k
      memcpy(buffer, buf, len);
3091
2.38M
      while (c != 0) { /* tested bigname2.xml */
3092
2.34M
    if (len + 10 > max) {
3093
8.89k
        xmlChar *tmp;
3094
3095
8.89k
        max *= 2;
3096
8.89k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
8.89k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
8.89k
        buffer = tmp;
3103
8.89k
    }
3104
2.34M
    buffer[len++] = c;
3105
2.34M
    c = *cur++;
3106
2.34M
      }
3107
37.1k
      buffer[len] = 0;
3108
37.1k
  }
3109
3110
3.18M
  if (buffer == NULL)
3111
3.14M
      ret = xmlStrndup(buf, len);
3112
37.1k
  else {
3113
37.1k
      ret = buffer;
3114
37.1k
  }
3115
3.18M
    }
3116
3117
9.68M
    return(ret);
3118
9.68M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
22.1M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
22.1M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
18.9M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
18.9M
      (((c >= 'a') && (c <= 'z')) ||
3160
18.9M
       ((c >= 'A') && (c <= 'Z')) ||
3161
18.9M
       (c == '_') || (c == ':') ||
3162
18.9M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
18.9M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
18.9M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
18.9M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
18.9M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
18.9M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
18.9M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
18.9M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
18.9M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
18.9M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
18.9M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
18.9M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
14.7M
      return(1);
3175
18.9M
    } else {
3176
3.14M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
1.39M
      return(1);
3178
3.14M
    }
3179
5.95M
    return(0);
3180
22.1M
}
3181
3182
static int
3183
506M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
506M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
486M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
486M
      (((c >= 'a') && (c <= 'z')) ||
3191
486M
       ((c >= 'A') && (c <= 'Z')) ||
3192
486M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
486M
       (c == '_') || (c == ':') ||
3194
486M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
486M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
486M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
486M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
486M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
486M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
486M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
486M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
486M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
486M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
486M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
486M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
486M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
486M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
486M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
472M
       return(1);
3210
486M
    } else {
3211
20.2M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
20.2M
            (c == '.') || (c == '-') ||
3213
20.2M
      (c == '_') || (c == ':') ||
3214
20.2M
      (IS_COMBINING(c)) ||
3215
20.2M
      (IS_EXTENDER(c)))
3216
19.2M
      return(1);
3217
20.2M
    }
3218
14.8M
    return(0);
3219
506M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
13.5M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
13.5M
    int len = 0, l;
3227
13.5M
    int c;
3228
13.5M
    int count = 0;
3229
13.5M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
5.15M
                    XML_MAX_TEXT_LENGTH :
3231
13.5M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
13.5M
    GROW;
3241
13.5M
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
13.5M
    c = CUR_CHAR(l);
3244
13.5M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
7.71M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
7.71M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
7.38M
         ((c >= 'A') && (c <= 'Z')) ||
3252
7.38M
         (c == '_') || (c == ':') ||
3253
7.38M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
7.38M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
7.38M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
7.38M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
7.38M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
7.38M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
7.38M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
7.38M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
7.38M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
7.38M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
7.38M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
7.38M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
5.14M
      return(NULL);
3266
5.14M
  }
3267
2.56M
  len += l;
3268
2.56M
  NEXTL(l);
3269
2.56M
  c = CUR_CHAR(l);
3270
39.7M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
39.7M
         (((c >= 'a') && (c <= 'z')) ||
3272
38.9M
          ((c >= 'A') && (c <= 'Z')) ||
3273
38.9M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
38.9M
          (c == '_') || (c == ':') ||
3275
38.9M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
38.9M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
38.9M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
38.9M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
38.9M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
38.9M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
38.9M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
38.9M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
38.9M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
38.9M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
38.9M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
38.9M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
38.9M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
38.9M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
38.9M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
38.9M
    )) {
3291
37.1M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
88.9k
    count = 0;
3293
88.9k
    GROW;
3294
88.9k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
88.9k
      }
3297
37.1M
            if (len <= INT_MAX - l)
3298
37.1M
          len += l;
3299
37.1M
      NEXTL(l);
3300
37.1M
      c = CUR_CHAR(l);
3301
37.1M
  }
3302
5.81M
    } else {
3303
5.81M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
5.81M
      (!IS_LETTER(c) && (c != '_') &&
3305
5.56M
       (c != ':'))) {
3306
3.86M
      return(NULL);
3307
3.86M
  }
3308
1.95M
  len += l;
3309
1.95M
  NEXTL(l);
3310
1.95M
  c = CUR_CHAR(l);
3311
3312
30.3M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
30.3M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
29.7M
    (c == '.') || (c == '-') ||
3315
29.7M
    (c == '_') || (c == ':') ||
3316
29.7M
    (IS_COMBINING(c)) ||
3317
29.7M
    (IS_EXTENDER(c)))) {
3318
28.4M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
74.5k
    count = 0;
3320
74.5k
    GROW;
3321
74.5k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
74.5k
      }
3324
28.4M
            if (len <= INT_MAX - l)
3325
28.4M
          len += l;
3326
28.4M
      NEXTL(l);
3327
28.4M
      c = CUR_CHAR(l);
3328
28.4M
  }
3329
1.95M
    }
3330
4.52M
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
4.52M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
4.52M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
12.5k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
4.51M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
4.52M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
39.5M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
39.5M
    const xmlChar *in;
3370
39.5M
    const xmlChar *ret;
3371
39.5M
    size_t count = 0;
3372
39.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
11.4M
                       XML_MAX_TEXT_LENGTH :
3374
39.5M
                       XML_MAX_NAME_LENGTH;
3375
3376
39.5M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
39.5M
    in = ctxt->input->cur;
3386
39.5M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
39.5M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
39.5M
  (*in == '_') || (*in == ':')) {
3389
28.6M
  in++;
3390
127M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
127M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
127M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
127M
         (*in == '_') || (*in == '-') ||
3394
127M
         (*in == ':') || (*in == '.'))
3395
99.2M
      in++;
3396
28.6M
  if ((*in > 0) && (*in < 0x80)) {
3397
25.9M
      count = in - ctxt->input->cur;
3398
25.9M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
25.9M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
25.9M
      ctxt->input->cur = in;
3404
25.9M
      ctxt->input->col += count;
3405
25.9M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
25.9M
      return(ret);
3408
25.9M
  }
3409
28.6M
    }
3410
    /* accelerator for special cases */
3411
13.5M
    return(xmlParseNameComplex(ctxt));
3412
39.5M
}
3413
3414
static const xmlChar *
3415
11.0M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
11.0M
    int len = 0, l;
3417
11.0M
    int c;
3418
11.0M
    int count = 0;
3419
11.0M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
3.17M
                    XML_MAX_TEXT_LENGTH :
3421
11.0M
                    XML_MAX_NAME_LENGTH;
3422
11.0M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
11.0M
    GROW;
3432
11.0M
    startPosition = CUR_PTR - BASE_PTR;
3433
11.0M
    c = CUR_CHAR(l);
3434
11.0M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
11.0M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
6.93M
  return(NULL);
3437
6.93M
    }
3438
3439
58.0M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
58.0M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
53.9M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
105k
      count = 0;
3443
105k
      GROW;
3444
105k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
105k
  }
3447
53.9M
        if (len <= INT_MAX - l)
3448
53.9M
      len += l;
3449
53.9M
  NEXTL(l);
3450
53.9M
  c = CUR_CHAR(l);
3451
53.9M
  if (c == 0) {
3452
325k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
325k
      ctxt->input->cur -= l;
3459
325k
      GROW;
3460
325k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
325k
      ctxt->input->cur += l;
3463
325k
      c = CUR_CHAR(l);
3464
325k
  }
3465
53.9M
    }
3466
4.11M
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
4.11M
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
4.11M
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
34.1M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
34.1M
    const xmlChar *in, *e;
3491
34.1M
    const xmlChar *ret;
3492
34.1M
    size_t count = 0;
3493
34.1M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
8.94M
                       XML_MAX_TEXT_LENGTH :
3495
34.1M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
34.1M
    in = ctxt->input->cur;
3505
34.1M
    e = ctxt->input->end;
3506
34.1M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
34.1M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
34.1M
   (*in == '_')) && (in < e)) {
3509
25.3M
  in++;
3510
95.1M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
95.1M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
95.1M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
95.1M
          (*in == '_') || (*in == '-') ||
3514
95.1M
          (*in == '.')) && (in < e))
3515
69.7M
      in++;
3516
25.3M
  if (in >= e)
3517
36.1k
      goto complex;
3518
25.3M
  if ((*in > 0) && (*in < 0x80)) {
3519
23.0M
      count = in - ctxt->input->cur;
3520
23.0M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
23.0M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
23.0M
      ctxt->input->cur = in;
3526
23.0M
      ctxt->input->col += count;
3527
23.0M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
23.0M
      return(ret);
3531
23.0M
  }
3532
25.3M
    }
3533
11.0M
complex:
3534
11.0M
    return(xmlParseNCNameComplex(ctxt));
3535
34.1M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
2.40M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
2.40M
    register const xmlChar *cmp = other;
3551
2.40M
    register const xmlChar *in;
3552
2.40M
    const xmlChar *ret;
3553
3554
2.40M
    GROW;
3555
2.40M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
2.40M
    in = ctxt->input->cur;
3559
8.50M
    while (*in != 0 && *in == *cmp) {
3560
6.10M
  ++in;
3561
6.10M
  ++cmp;
3562
6.10M
    }
3563
2.40M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
1.28M
  ctxt->input->col += in - ctxt->input->cur;
3566
1.28M
  ctxt->input->cur = in;
3567
1.28M
  return (const xmlChar*) 1;
3568
1.28M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
1.12M
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
1.12M
    if (ret == other) {
3573
81.6k
  return (const xmlChar*) 1;
3574
81.6k
    }
3575
1.04M
    return ret;
3576
1.12M
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
11.6M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
11.6M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
11.6M
    const xmlChar *cur = *str;
3600
11.6M
    int len = 0, l;
3601
11.6M
    int c;
3602
11.6M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
40.3k
                    XML_MAX_TEXT_LENGTH :
3604
11.6M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
11.6M
    c = CUR_SCHAR(cur, l);
3611
11.6M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
11.5k
  return(NULL);
3613
11.5k
    }
3614
3615
11.6M
    COPY_BUF(l,buf,len,c);
3616
11.6M
    cur += l;
3617
11.6M
    c = CUR_SCHAR(cur, l);
3618
170M
    while (xmlIsNameChar(ctxt, c)) {
3619
159M
  COPY_BUF(l,buf,len,c);
3620
159M
  cur += l;
3621
159M
  c = CUR_SCHAR(cur, l);
3622
159M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
1.19M
      xmlChar *buffer;
3628
1.19M
      int max = len * 2;
3629
3630
1.19M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
1.19M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
1.19M
      memcpy(buffer, buf, len);
3636
270M
      while (xmlIsNameChar(ctxt, c)) {
3637
269M
    if (len + 10 > max) {
3638
1.19M
        xmlChar *tmp;
3639
3640
1.19M
        max *= 2;
3641
1.19M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
1.19M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
1.19M
        buffer = tmp;
3648
1.19M
    }
3649
269M
    COPY_BUF(l,buffer,len,c);
3650
269M
    cur += l;
3651
269M
    c = CUR_SCHAR(cur, l);
3652
269M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
269M
      }
3658
1.19M
      buffer[len] = 0;
3659
1.19M
      *str = cur;
3660
1.19M
      return(buffer);
3661
1.19M
  }
3662
159M
    }
3663
10.4M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
10.4M
    *str = cur;
3668
10.4M
    return(xmlStrndup(buf, len));
3669
10.4M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
842k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
842k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
842k
    int len = 0, l;
3690
842k
    int c;
3691
842k
    int count = 0;
3692
842k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
254k
                    XML_MAX_TEXT_LENGTH :
3694
842k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
842k
    GROW;
3701
842k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
842k
    c = CUR_CHAR(l);
3704
3705
7.25M
    while (xmlIsNameChar(ctxt, c)) {
3706
6.43M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
6.43M
  COPY_BUF(l,buf,len,c);
3711
6.43M
  NEXTL(l);
3712
6.43M
  c = CUR_CHAR(l);
3713
6.43M
  if (c == 0) {
3714
18.6k
      count = 0;
3715
18.6k
      GROW;
3716
18.6k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
18.6k
            c = CUR_CHAR(l);
3719
18.6k
  }
3720
6.43M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
28.4k
      xmlChar *buffer;
3726
28.4k
      int max = len * 2;
3727
3728
28.4k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
28.4k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
28.4k
      memcpy(buffer, buf, len);
3734
2.04M
      while (xmlIsNameChar(ctxt, c)) {
3735
2.01M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
36.6k
        count = 0;
3737
36.6k
        GROW;
3738
36.6k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
36.6k
    }
3743
2.01M
    if (len + 10 > max) {
3744
8.93k
        xmlChar *tmp;
3745
3746
8.93k
        max *= 2;
3747
8.93k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
8.93k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
8.93k
        buffer = tmp;
3754
8.93k
    }
3755
2.01M
    COPY_BUF(l,buffer,len,c);
3756
2.01M
    NEXTL(l);
3757
2.01M
    c = CUR_CHAR(l);
3758
2.01M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
2.01M
      }
3764
28.4k
      buffer[len] = 0;
3765
28.4k
      return(buffer);
3766
28.4k
  }
3767
6.43M
    }
3768
813k
    if (len == 0)
3769
472k
        return(NULL);
3770
341k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
341k
    return(xmlStrndup(buf, len));
3775
341k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
332k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
332k
    xmlChar *buf = NULL;
3795
332k
    int len = 0;
3796
332k
    int size = XML_PARSER_BUFFER_SIZE;
3797
332k
    int c, l;
3798
332k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
79.5k
                    XML_MAX_HUGE_LENGTH :
3800
332k
                    XML_MAX_TEXT_LENGTH;
3801
332k
    xmlChar stop;
3802
332k
    xmlChar *ret = NULL;
3803
332k
    const xmlChar *cur = NULL;
3804
332k
    xmlParserInputPtr input;
3805
3806
332k
    if (RAW == '"') stop = '"';
3807
55.8k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
332k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
332k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
332k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
332k
    input = ctxt->input;
3824
332k
    GROW;
3825
332k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
332k
    NEXT;
3828
332k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
15.2M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
15.1M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
14.8M
  if (len + 5 >= size) {
3841
41.7k
      xmlChar *tmp;
3842
3843
41.7k
      size *= 2;
3844
41.7k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
41.7k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
41.7k
      buf = tmp;
3850
41.7k
  }
3851
14.8M
  COPY_BUF(l,buf,len,c);
3852
14.8M
  NEXTL(l);
3853
3854
14.8M
  GROW;
3855
14.8M
  c = CUR_CHAR(l);
3856
14.8M
  if (c == 0) {
3857
8.67k
      GROW;
3858
8.67k
      c = CUR_CHAR(l);
3859
8.67k
  }
3860
3861
14.8M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
14.8M
    }
3867
332k
    buf[len] = 0;
3868
332k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
332k
    if (c != stop) {
3871
15.4k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
15.4k
        goto error;
3873
15.4k
    }
3874
316k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
316k
    cur = buf;
3882
8.86M
    while (*cur != 0) { /* non input consuming */
3883
8.57M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
180k
      xmlChar *name;
3885
180k
      xmlChar tmp = *cur;
3886
180k
            int nameOk = 0;
3887
3888
180k
      cur++;
3889
180k
      name = xmlParseStringName(ctxt, &cur);
3890
180k
            if (name != NULL) {
3891
168k
                nameOk = 1;
3892
168k
                xmlFree(name);
3893
168k
            }
3894
180k
            if ((nameOk == 0) || (*cur != ';')) {
3895
21.2k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
21.2k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
21.2k
                            tmp);
3898
21.2k
                goto error;
3899
21.2k
      }
3900
158k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
158k
    (ctxt->inputNr == 1)) {
3902
1.78k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
1.78k
                goto error;
3904
1.78k
      }
3905
157k
      if (*cur == 0)
3906
0
          break;
3907
157k
  }
3908
8.54M
  cur++;
3909
8.54M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
293k
    ++ctxt->depth;
3920
293k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
293k
                                     0, 0, 0, /* check */ 1);
3922
293k
    --ctxt->depth;
3923
3924
293k
    if (orig != NULL) {
3925
293k
        *orig = buf;
3926
293k
        buf = NULL;
3927
293k
    }
3928
3929
332k
error:
3930
332k
    if (buf != NULL)
3931
38.4k
        xmlFree(buf);
3932
332k
    return(ret);
3933
293k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
2.31M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
2.31M
    xmlChar limit = 0;
3950
2.31M
    xmlChar *buf = NULL;
3951
2.31M
    xmlChar *rep = NULL;
3952
2.31M
    size_t len = 0;
3953
2.31M
    size_t buf_size = 0;
3954
2.31M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
767k
                       XML_MAX_HUGE_LENGTH :
3956
2.31M
                       XML_MAX_TEXT_LENGTH;
3957
2.31M
    int c, l, in_space = 0;
3958
2.31M
    xmlChar *current = NULL;
3959
2.31M
    xmlEntityPtr ent;
3960
3961
2.31M
    if (NXT(0) == '"') {
3962
1.32M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.32M
  limit = '"';
3964
1.32M
        NEXT;
3965
1.32M
    } else if (NXT(0) == '\'') {
3966
995k
  limit = '\'';
3967
995k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
995k
        NEXT;
3969
995k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
2.31M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
2.31M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
2.31M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
2.31M
    c = CUR_CHAR(l);
3985
69.6M
    while (((NXT(0) != limit) && /* checked */
3986
69.6M
            (IS_CHAR(c)) && (c != '<')) &&
3987
69.6M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
67.3M
  if (c == '&') {
3989
1.54M
      in_space = 0;
3990
1.54M
      if (NXT(1) == '#') {
3991
425k
    int val = xmlParseCharRef(ctxt);
3992
3993
425k
    if (val == '&') {
3994
78.6k
        if (ctxt->replaceEntities) {
3995
30.1k
      if (len + 10 > buf_size) {
3996
78
          growBuffer(buf, 10);
3997
78
      }
3998
30.1k
      buf[len++] = '&';
3999
48.5k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
48.5k
      if (len + 10 > buf_size) {
4005
116
          growBuffer(buf, 10);
4006
116
      }
4007
48.5k
      buf[len++] = '&';
4008
48.5k
      buf[len++] = '#';
4009
48.5k
      buf[len++] = '3';
4010
48.5k
      buf[len++] = '8';
4011
48.5k
      buf[len++] = ';';
4012
48.5k
        }
4013
346k
    } else if (val != 0) {
4014
216k
        if (len + 10 > buf_size) {
4015
1.18k
      growBuffer(buf, 10);
4016
1.18k
        }
4017
216k
        len += xmlCopyChar(0, &buf[len], val);
4018
216k
    }
4019
1.11M
      } else {
4020
1.11M
    ent = xmlParseEntityRef(ctxt);
4021
1.11M
    if ((ent != NULL) &&
4022
1.11M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
137k
        if (len + 10 > buf_size) {
4024
166
      growBuffer(buf, 10);
4025
166
        }
4026
137k
        if ((ctxt->replaceEntities == 0) &&
4027
137k
            (ent->content[0] == '&')) {
4028
32.3k
      buf[len++] = '&';
4029
32.3k
      buf[len++] = '#';
4030
32.3k
      buf[len++] = '3';
4031
32.3k
      buf[len++] = '8';
4032
32.3k
      buf[len++] = ';';
4033
105k
        } else {
4034
105k
      buf[len++] = ent->content[0];
4035
105k
        }
4036
982k
    } else if ((ent != NULL) &&
4037
982k
               (ctxt->replaceEntities != 0)) {
4038
380k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
380k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
380k
      ++ctxt->depth;
4043
380k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
380k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
380k
                                /* check */ 1);
4046
380k
      --ctxt->depth;
4047
380k
      if (rep != NULL) {
4048
374k
          current = rep;
4049
76.0M
          while (*current != 0) { /* non input consuming */
4050
75.6M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
75.6M
                                    (*current == 0x9)) {
4052
18.8k
                                    buf[len++] = 0x20;
4053
18.8k
                                    current++;
4054
18.8k
                                } else
4055
75.6M
                                    buf[len++] = *current++;
4056
75.6M
        if (len + 10 > buf_size) {
4057
1.45k
            growBuffer(buf, 10);
4058
1.45k
        }
4059
75.6M
          }
4060
374k
          xmlFree(rep);
4061
374k
          rep = NULL;
4062
374k
      }
4063
380k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
601k
    } else if (ent != NULL) {
4071
33.1k
        int i = xmlStrlen(ent->name);
4072
33.1k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
33.1k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
33.1k
      (ent->content != NULL)) {
4081
29.0k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
4.19k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
4.19k
                            ctxt->sizeentcopy = ent->length;
4085
4086
4.19k
                            ++ctxt->depth;
4087
4.19k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
4.19k
                                    ent->content, ent->length,
4089
4.19k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
4.19k
                                    /* check */ 1);
4091
4.19k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
4.19k
                            if (ctxt->inSubset == 0) {
4100
4.17k
                                ent->flags |= XML_ENT_CHECKED;
4101
4.17k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
4.17k
                            }
4103
4104
4.19k
                            if (rep != NULL) {
4105
4.18k
                                xmlFree(rep);
4106
4.18k
                                rep = NULL;
4107
4.18k
                            } else {
4108
10
                                ent->content[0] = 0;
4109
10
                            }
4110
4111
4.19k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
0
                                goto error;
4113
24.8k
                        } else {
4114
24.8k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
24.8k
                        }
4117
29.0k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
33.1k
        buf[len++] = '&';
4123
33.1k
        while (len + i + 10 > buf_size) {
4124
80
      growBuffer(buf, i + 10);
4125
80
        }
4126
79.7k
        for (;i > 0;i--)
4127
46.5k
      buf[len++] = *cur++;
4128
33.1k
        buf[len++] = ';';
4129
33.1k
    }
4130
1.11M
      }
4131
65.7M
  } else {
4132
65.7M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
4.22M
          if ((len != 0) || (!normalize)) {
4134
4.21M
        if ((!normalize) || (!in_space)) {
4135
4.19M
      COPY_BUF(l,buf,len,0x20);
4136
4.20M
      while (len + 10 > buf_size) {
4137
26.5k
          growBuffer(buf, 10);
4138
26.5k
      }
4139
4.19M
        }
4140
4.21M
        in_space = 1;
4141
4.21M
    }
4142
61.5M
      } else {
4143
61.5M
          in_space = 0;
4144
61.5M
    COPY_BUF(l,buf,len,c);
4145
61.5M
    if (len + 10 > buf_size) {
4146
336k
        growBuffer(buf, 10);
4147
336k
    }
4148
61.5M
      }
4149
65.7M
      NEXTL(l);
4150
65.7M
  }
4151
67.3M
  GROW;
4152
67.3M
  c = CUR_CHAR(l);
4153
67.3M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
67.3M
    }
4159
2.31M
    if (ctxt->instate == XML_PARSER_EOF)
4160
213
        goto error;
4161
4162
2.31M
    if ((in_space) && (normalize)) {
4163
13.2k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
6.57k
    }
4165
2.31M
    buf[len] = 0;
4166
2.31M
    if (RAW == '<') {
4167
950k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.36M
    } else if (RAW != limit) {
4169
554k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
282k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
282k
         "invalid character in attribute value\n");
4172
282k
  } else {
4173
272k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
272k
         "AttValue: ' expected\n");
4175
272k
        }
4176
554k
    } else
4177
814k
  NEXT;
4178
4179
2.31M
    if (attlen != NULL) *attlen = len;
4180
2.31M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
213
error:
4185
213
    if (buf != NULL)
4186
213
        xmlFree(buf);
4187
213
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
213
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
2.83M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
2.83M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
2.83M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
2.83M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
341k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
341k
    xmlChar *buf = NULL;
4250
341k
    int len = 0;
4251
341k
    int size = XML_PARSER_BUFFER_SIZE;
4252
341k
    int cur, l;
4253
341k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
102k
                    XML_MAX_TEXT_LENGTH :
4255
341k
                    XML_MAX_NAME_LENGTH;
4256
341k
    xmlChar stop;
4257
341k
    int state = ctxt->instate;
4258
341k
    int count = 0;
4259
4260
341k
    SHRINK;
4261
341k
    if (RAW == '"') {
4262
251k
        NEXT;
4263
251k
  stop = '"';
4264
251k
    } else if (RAW == '\'') {
4265
59.1k
        NEXT;
4266
59.1k
  stop = '\'';
4267
59.1k
    } else {
4268
31.7k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
31.7k
  return(NULL);
4270
31.7k
    }
4271
4272
310k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
310k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
310k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
310k
    cur = CUR_CHAR(l);
4279
15.6M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
15.3M
  if (len + 5 >= size) {
4281
49.3k
      xmlChar *tmp;
4282
4283
49.3k
      size *= 2;
4284
49.3k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
49.3k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
49.3k
      buf = tmp;
4292
49.3k
  }
4293
15.3M
  count++;
4294
15.3M
  if (count > 50) {
4295
177k
      SHRINK;
4296
177k
      GROW;
4297
177k
      count = 0;
4298
177k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
177k
  }
4303
15.3M
  COPY_BUF(l,buf,len,cur);
4304
15.3M
  NEXTL(l);
4305
15.3M
  cur = CUR_CHAR(l);
4306
15.3M
  if (cur == 0) {
4307
14.3k
      GROW;
4308
14.3k
      SHRINK;
4309
14.3k
      cur = CUR_CHAR(l);
4310
14.3k
  }
4311
15.3M
        if (len > maxLength) {
4312
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
0
            xmlFree(buf);
4314
0
            ctxt->instate = (xmlParserInputState) state;
4315
0
            return(NULL);
4316
0
        }
4317
15.3M
    }
4318
310k
    buf[len] = 0;
4319
310k
    ctxt->instate = (xmlParserInputState) state;
4320
310k
    if (!IS_CHAR(cur)) {
4321
24.9k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
285k
    } else {
4323
285k
  NEXT;
4324
285k
    }
4325
310k
    return(buf);
4326
310k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
155k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
155k
    xmlChar *buf = NULL;
4344
155k
    int len = 0;
4345
155k
    int size = XML_PARSER_BUFFER_SIZE;
4346
155k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
47.2k
                    XML_MAX_TEXT_LENGTH :
4348
155k
                    XML_MAX_NAME_LENGTH;
4349
155k
    xmlChar cur;
4350
155k
    xmlChar stop;
4351
155k
    int count = 0;
4352
155k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
155k
    SHRINK;
4355
155k
    if (RAW == '"') {
4356
111k
        NEXT;
4357
111k
  stop = '"';
4358
111k
    } else if (RAW == '\'') {
4359
38.5k
        NEXT;
4360
38.5k
  stop = '\'';
4361
38.5k
    } else {
4362
5.28k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
5.28k
  return(NULL);
4364
5.28k
    }
4365
149k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
149k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
149k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
149k
    cur = CUR;
4372
4.27M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
4.12M
  if (len + 1 >= size) {
4374
5.10k
      xmlChar *tmp;
4375
4376
5.10k
      size *= 2;
4377
5.10k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
5.10k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
5.10k
      buf = tmp;
4384
5.10k
  }
4385
4.12M
  buf[len++] = cur;
4386
4.12M
  count++;
4387
4.12M
  if (count > 50) {
4388
16.4k
      SHRINK;
4389
16.4k
      GROW;
4390
16.4k
      count = 0;
4391
16.4k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
16.4k
  }
4396
4.12M
  NEXT;
4397
4.12M
  cur = CUR;
4398
4.12M
  if (cur == 0) {
4399
2.62k
      GROW;
4400
2.62k
      SHRINK;
4401
2.62k
      cur = CUR;
4402
2.62k
  }
4403
4.12M
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
4.12M
    }
4409
149k
    buf[len] = 0;
4410
149k
    if (cur != stop) {
4411
19.8k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
130k
    } else {
4413
130k
  NEXT;
4414
130k
    }
4415
149k
    ctxt->instate = oldstate;
4416
149k
    return(buf);
4417
149k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
72.9M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
72.9M
    const xmlChar *in;
4482
72.9M
    int nbchar = 0;
4483
72.9M
    int line = ctxt->input->line;
4484
72.9M
    int col = ctxt->input->col;
4485
72.9M
    int ccol;
4486
4487
72.9M
    SHRINK;
4488
72.9M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
72.9M
    in = ctxt->input->cur;
4494
75.0M
    do {
4495
80.1M
get_more_space:
4496
98.4M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
80.1M
        if (*in == 0xA) {
4498
6.33M
            do {
4499
6.33M
                ctxt->input->line++; ctxt->input->col = 1;
4500
6.33M
                in++;
4501
6.33M
            } while (*in == 0xA);
4502
5.09M
            goto get_more_space;
4503
5.09M
        }
4504
75.0M
        if (*in == '<') {
4505
4.96M
            nbchar = in - ctxt->input->cur;
4506
4.96M
            if (nbchar > 0) {
4507
4.96M
                const xmlChar *tmp = ctxt->input->cur;
4508
4.96M
                ctxt->input->cur = in;
4509
4510
4.96M
                if ((ctxt->sax != NULL) &&
4511
4.96M
                    (ctxt->sax->ignorableWhitespace !=
4512
4.96M
                     ctxt->sax->characters)) {
4513
2.24M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.17M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.17M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.17M
                                                   tmp, nbchar);
4517
1.17M
                    } else {
4518
1.06M
                        if (ctxt->sax->characters != NULL)
4519
1.06M
                            ctxt->sax->characters(ctxt->userData,
4520
1.06M
                                                  tmp, nbchar);
4521
1.06M
                        if (*ctxt->space == -1)
4522
320k
                            *ctxt->space = -2;
4523
1.06M
                    }
4524
2.72M
                } else if ((ctxt->sax != NULL) &&
4525
2.72M
                           (ctxt->sax->characters != NULL)) {
4526
2.72M
                    ctxt->sax->characters(ctxt->userData,
4527
2.72M
                                          tmp, nbchar);
4528
2.72M
                }
4529
4.96M
            }
4530
4.96M
            return;
4531
4.96M
        }
4532
4533
79.1M
get_more:
4534
79.1M
        ccol = ctxt->input->col;
4535
433M
        while (test_char_data[*in]) {
4536
354M
            in++;
4537
354M
            ccol++;
4538
354M
        }
4539
79.1M
        ctxt->input->col = ccol;
4540
79.1M
        if (*in == 0xA) {
4541
7.78M
            do {
4542
7.78M
                ctxt->input->line++; ctxt->input->col = 1;
4543
7.78M
                in++;
4544
7.78M
            } while (*in == 0xA);
4545
5.79M
            goto get_more;
4546
5.79M
        }
4547
73.3M
        if (*in == ']') {
4548
3.84M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
639k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
639k
                ctxt->input->cur = in + 1;
4551
639k
                return;
4552
639k
            }
4553
3.20M
            in++;
4554
3.20M
            ctxt->input->col++;
4555
3.20M
            goto get_more;
4556
3.84M
        }
4557
69.4M
        nbchar = in - ctxt->input->cur;
4558
69.4M
        if (nbchar > 0) {
4559
26.6M
            if ((ctxt->sax != NULL) &&
4560
26.6M
                (ctxt->sax->ignorableWhitespace !=
4561
26.6M
                 ctxt->sax->characters) &&
4562
26.6M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
1.62M
                const xmlChar *tmp = ctxt->input->cur;
4564
1.62M
                ctxt->input->cur = in;
4565
4566
1.62M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
29.3k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
29.3k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
29.3k
                                                       tmp, nbchar);
4570
1.59M
                } else {
4571
1.59M
                    if (ctxt->sax->characters != NULL)
4572
1.59M
                        ctxt->sax->characters(ctxt->userData,
4573
1.59M
                                              tmp, nbchar);
4574
1.59M
                    if (*ctxt->space == -1)
4575
847k
                        *ctxt->space = -2;
4576
1.59M
                }
4577
1.62M
                line = ctxt->input->line;
4578
1.62M
                col = ctxt->input->col;
4579
25.0M
            } else if (ctxt->sax != NULL) {
4580
25.0M
                if (ctxt->sax->characters != NULL)
4581
25.0M
                    ctxt->sax->characters(ctxt->userData,
4582
25.0M
                                          ctxt->input->cur, nbchar);
4583
25.0M
                line = ctxt->input->line;
4584
25.0M
                col = ctxt->input->col;
4585
25.0M
            }
4586
26.6M
        }
4587
69.4M
        ctxt->input->cur = in;
4588
69.4M
        if (*in == 0xD) {
4589
2.83M
            in++;
4590
2.83M
            if (*in == 0xA) {
4591
2.18M
                ctxt->input->cur = in;
4592
2.18M
                in++;
4593
2.18M
                ctxt->input->line++; ctxt->input->col = 1;
4594
2.18M
                continue; /* while */
4595
2.18M
            }
4596
653k
            in--;
4597
653k
        }
4598
67.2M
        if (*in == '<') {
4599
10.8M
            return;
4600
10.8M
        }
4601
56.4M
        if (*in == '&') {
4602
2.55M
            return;
4603
2.55M
        }
4604
53.8M
        SHRINK;
4605
53.8M
        GROW;
4606
53.8M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
53.8M
        in = ctxt->input->cur;
4609
56.0M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
56.0M
             (*in == 0x09) || (*in == 0x0a));
4611
53.9M
    ctxt->input->line = line;
4612
53.9M
    ctxt->input->col = col;
4613
53.9M
    xmlParseCharDataComplex(ctxt);
4614
53.9M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
53.9M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
53.9M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
53.9M
    int nbchar = 0;
4631
53.9M
    int cur, l;
4632
53.9M
    int count = 0;
4633
4634
53.9M
    SHRINK;
4635
53.9M
    GROW;
4636
53.9M
    cur = CUR_CHAR(l);
4637
412M
    while ((cur != '<') && /* checked */
4638
412M
           (cur != '&') &&
4639
412M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
358M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
246k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
246k
  }
4643
358M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
358M
  NEXTL(l);
4646
358M
  cur = CUR_CHAR(l);
4647
358M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
91.9k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
91.9k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
81.0k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
81.0k
    } else {
4659
81.0k
        if (ctxt->sax->characters != NULL)
4660
81.0k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
81.0k
        if ((ctxt->sax->characters !=
4662
81.0k
             ctxt->sax->ignorableWhitespace) &&
4663
81.0k
      (*ctxt->space == -1))
4664
9.10k
      *ctxt->space = -2;
4665
81.0k
    }
4666
81.0k
      }
4667
91.9k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
91.9k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
91.9k
  }
4672
358M
  count++;
4673
358M
  if (count > 50) {
4674
2.96M
      SHRINK;
4675
2.96M
      GROW;
4676
2.96M
      count = 0;
4677
2.96M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
2.96M
  }
4680
358M
    }
4681
53.9M
    if (nbchar != 0) {
4682
14.3M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
14.3M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
13.1M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
21.9k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
21.9k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
13.1M
      } else {
4691
13.1M
    if (ctxt->sax->characters != NULL)
4692
13.1M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
13.1M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
13.1M
        (*ctxt->space == -1))
4695
1.87M
        *ctxt->space = -2;
4696
13.1M
      }
4697
13.1M
  }
4698
14.3M
    }
4699
53.9M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
43.1M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
43.1M
                          "PCDATA invalid Char value %d\n",
4703
43.1M
                    cur ? cur : CUR);
4704
43.1M
  NEXT;
4705
43.1M
    }
4706
53.9M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
1.12M
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
1.12M
    xmlChar *URI = NULL;
4735
4736
1.12M
    SHRINK;
4737
4738
1.12M
    *publicID = NULL;
4739
1.12M
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
192k
        SKIP(6);
4741
192k
  if (SKIP_BLANKS == 0) {
4742
3.35k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
3.35k
                     "Space required after 'SYSTEM'\n");
4744
3.35k
  }
4745
192k
  URI = xmlParseSystemLiteral(ctxt);
4746
192k
  if (URI == NULL) {
4747
4.41k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
4.41k
        }
4749
932k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
155k
        SKIP(6);
4751
155k
  if (SKIP_BLANKS == 0) {
4752
4.13k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
4.13k
        "Space required after 'PUBLIC'\n");
4754
4.13k
  }
4755
155k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
155k
  if (*publicID == NULL) {
4757
5.28k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
5.28k
  }
4759
155k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
149k
      if (SKIP_BLANKS == 0) {
4764
27.0k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
27.0k
      "Space required after the Public Identifier\n");
4766
27.0k
      }
4767
149k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
5.66k
      if (SKIP_BLANKS == 0) return(NULL);
4775
77
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
77
  }
4777
149k
  URI = xmlParseSystemLiteral(ctxt);
4778
149k
  if (URI == NULL) {
4779
27.2k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
27.2k
        }
4781
149k
    }
4782
1.11M
    return(URI);
4783
1.12M
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
625k
                       size_t len, size_t size) {
4802
625k
    int q, ql;
4803
625k
    int r, rl;
4804
625k
    int cur, l;
4805
625k
    size_t count = 0;
4806
625k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
201k
                       XML_MAX_HUGE_LENGTH :
4808
625k
                       XML_MAX_TEXT_LENGTH;
4809
625k
    int inputid;
4810
4811
625k
    inputid = ctxt->input->id;
4812
4813
625k
    if (buf == NULL) {
4814
56.5k
        len = 0;
4815
56.5k
  size = XML_PARSER_BUFFER_SIZE;
4816
56.5k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
56.5k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
56.5k
    }
4822
625k
    GROW; /* Assure there's enough input data */
4823
625k
    q = CUR_CHAR(ql);
4824
625k
    if (q == 0)
4825
56.5k
        goto not_terminated;
4826
569k
    if (!IS_CHAR(q)) {
4827
78.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
78.2k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
78.2k
                    q);
4830
78.2k
  xmlFree (buf);
4831
78.2k
  return;
4832
78.2k
    }
4833
490k
    NEXTL(ql);
4834
490k
    r = CUR_CHAR(rl);
4835
490k
    if (r == 0)
4836
8.94k
        goto not_terminated;
4837
481k
    if (!IS_CHAR(r)) {
4838
18.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
18.2k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
18.2k
                    r);
4841
18.2k
  xmlFree (buf);
4842
18.2k
  return;
4843
18.2k
    }
4844
463k
    NEXTL(rl);
4845
463k
    cur = CUR_CHAR(l);
4846
463k
    if (cur == 0)
4847
6.55k
        goto not_terminated;
4848
42.9M
    while (IS_CHAR(cur) && /* checked */
4849
42.9M
           ((cur != '>') ||
4850
42.7M
      (r != '-') || (q != '-'))) {
4851
42.5M
  if ((r == '-') && (q == '-')) {
4852
516k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
516k
  }
4854
42.5M
  if (len + 5 >= size) {
4855
212k
      xmlChar *new_buf;
4856
212k
            size_t new_size;
4857
4858
212k
      new_size = size * 2;
4859
212k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
212k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
212k
      buf = new_buf;
4866
212k
            size = new_size;
4867
212k
  }
4868
42.5M
  COPY_BUF(ql,buf,len,q);
4869
42.5M
  q = r;
4870
42.5M
  ql = rl;
4871
42.5M
  r = cur;
4872
42.5M
  rl = l;
4873
4874
42.5M
  count++;
4875
42.5M
  if (count > 50) {
4876
656k
      SHRINK;
4877
656k
      GROW;
4878
656k
      count = 0;
4879
656k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
656k
  }
4884
42.5M
  NEXTL(l);
4885
42.5M
  cur = CUR_CHAR(l);
4886
42.5M
  if (cur == 0) {
4887
98.9k
      SHRINK;
4888
98.9k
      GROW;
4889
98.9k
      cur = CUR_CHAR(l);
4890
98.9k
  }
4891
4892
42.5M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
42.5M
    }
4899
457k
    buf[len] = 0;
4900
457k
    if (cur == 0) {
4901
98.9k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
98.9k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
358k
    } else if (!IS_CHAR(cur)) {
4904
129k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
129k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
129k
                    cur);
4907
228k
    } else {
4908
228k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
228k
        NEXT;
4914
228k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
228k
      (!ctxt->disableSAX))
4916
187k
      ctxt->sax->comment(ctxt->userData, buf);
4917
228k
    }
4918
457k
    xmlFree(buf);
4919
457k
    return;
4920
72.0k
not_terminated:
4921
72.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
72.0k
       "Comment not terminated\n", NULL);
4923
72.0k
    xmlFree(buf);
4924
72.0k
    return;
4925
457k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
9.04M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
9.04M
    xmlChar *buf = NULL;
4943
9.04M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
9.04M
    size_t len = 0;
4945
9.04M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
443k
                       XML_MAX_HUGE_LENGTH :
4947
9.04M
                       XML_MAX_TEXT_LENGTH;
4948
9.04M
    xmlParserInputState state;
4949
9.04M
    const xmlChar *in;
4950
9.04M
    size_t nbchar = 0;
4951
9.04M
    int ccol;
4952
9.04M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
9.04M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
9.04M
    SKIP(2);
4960
9.04M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
2.59k
        return;
4962
9.03M
    state = ctxt->instate;
4963
9.03M
    ctxt->instate = XML_PARSER_COMMENT;
4964
9.03M
    inputid = ctxt->input->id;
4965
9.03M
    SKIP(2);
4966
9.03M
    SHRINK;
4967
9.03M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
9.03M
    in = ctxt->input->cur;
4974
9.03M
    do {
4975
9.03M
  if (*in == 0xA) {
4976
118k
      do {
4977
118k
    ctxt->input->line++; ctxt->input->col = 1;
4978
118k
    in++;
4979
118k
      } while (*in == 0xA);
4980
49.1k
  }
4981
10.9M
get_more:
4982
10.9M
        ccol = ctxt->input->col;
4983
59.9M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
59.9M
         ((*in >= 0x20) && (*in < '-')) ||
4985
59.9M
         (*in == 0x09)) {
4986
49.0M
        in++;
4987
49.0M
        ccol++;
4988
49.0M
  }
4989
10.9M
  ctxt->input->col = ccol;
4990
10.9M
  if (*in == 0xA) {
4991
836k
      do {
4992
836k
    ctxt->input->line++; ctxt->input->col = 1;
4993
836k
    in++;
4994
836k
      } while (*in == 0xA);
4995
598k
      goto get_more;
4996
598k
  }
4997
10.3M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
10.3M
  if (nbchar > 0) {
5002
2.60M
      if ((ctxt->sax != NULL) &&
5003
2.60M
    (ctxt->sax->comment != NULL)) {
5004
2.60M
    if (buf == NULL) {
5005
1.38M
        if ((*in == '-') && (in[1] == '-'))
5006
782k
            size = nbchar + 1;
5007
597k
        else
5008
597k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
1.38M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
1.38M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
1.38M
        len = 0;
5016
1.38M
    } else if (len + nbchar + 1 >= size) {
5017
181k
        xmlChar *new_buf;
5018
181k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
181k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
181k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
181k
        buf = new_buf;
5027
181k
    }
5028
2.60M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
2.60M
    len += nbchar;
5030
2.60M
    buf[len] = 0;
5031
2.60M
      }
5032
2.60M
  }
5033
10.3M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
10.3M
  ctxt->input->cur = in;
5040
10.3M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
10.3M
  if (*in == 0xD) {
5045
241k
      in++;
5046
241k
      if (*in == 0xA) {
5047
209k
    ctxt->input->cur = in;
5048
209k
    in++;
5049
209k
    ctxt->input->line++; ctxt->input->col = 1;
5050
209k
    goto get_more;
5051
209k
      }
5052
31.7k
      in--;
5053
31.7k
  }
5054
10.1M
  SHRINK;
5055
10.1M
  GROW;
5056
10.1M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
10.1M
  in = ctxt->input->cur;
5061
10.1M
  if (*in == '-') {
5062
9.49M
      if (in[1] == '-') {
5063
8.83M
          if (in[2] == '>') {
5064
8.41M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
8.41M
        SKIP(3);
5070
8.41M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
8.41M
            (!ctxt->disableSAX)) {
5072
8.28M
      if (buf != NULL)
5073
689k
          ctxt->sax->comment(ctxt->userData, buf);
5074
7.59M
      else
5075
7.59M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
8.28M
        }
5077
8.41M
        if (buf != NULL)
5078
811k
            xmlFree(buf);
5079
8.41M
        if (ctxt->instate != XML_PARSER_EOF)
5080
8.41M
      ctxt->instate = state;
5081
8.41M
        return;
5082
8.41M
    }
5083
422k
    if (buf != NULL) {
5084
391k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
391k
                          "Double hyphen within comment: "
5086
391k
                                      "<!--%.50s\n",
5087
391k
              buf);
5088
391k
    } else
5089
31.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
31.0k
                          "Double hyphen within comment\n", NULL);
5091
422k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
422k
    in++;
5096
422k
    ctxt->input->col++;
5097
422k
      }
5098
1.08M
      in++;
5099
1.08M
      ctxt->input->col++;
5100
1.08M
      goto get_more;
5101
9.49M
  }
5102
10.1M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
625k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
625k
    ctxt->instate = state;
5105
625k
    return;
5106
9.03M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
1.74M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
1.74M
    const xmlChar *name;
5125
5126
1.74M
    name = xmlParseName(ctxt);
5127
1.74M
    if ((name != NULL) &&
5128
1.74M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
1.74M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
1.74M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
438k
  int i;
5132
438k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
438k
      (name[2] == 'l') && (name[3] == 0)) {
5134
319k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
319k
     "XML declaration allowed only at the start of the document\n");
5136
319k
      return(name);
5137
319k
  } else if (name[3] == 0) {
5138
18.8k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
18.8k
      return(name);
5140
18.8k
  }
5141
299k
  for (i = 0;;i++) {
5142
299k
      if (xmlW3CPIs[i] == NULL) break;
5143
199k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
970
          return(name);
5145
199k
  }
5146
99.2k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
99.2k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
99.2k
          NULL, NULL);
5149
99.2k
    }
5150
1.40M
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
52.5k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
52.5k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
52.5k
    }
5154
1.40M
    return(name);
5155
1.74M
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
28
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
28
    xmlChar *URL = NULL;
5176
28
    const xmlChar *tmp, *base;
5177
28
    xmlChar marker;
5178
5179
28
    tmp = catalog;
5180
28
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
28
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
28
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
28
error:
5211
28
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
28
            "Catalog PI syntax error: %s\n",
5213
28
      catalog, NULL);
5214
28
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
28
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
1.74M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
1.74M
    xmlChar *buf = NULL;
5235
1.74M
    size_t len = 0;
5236
1.74M
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
1.74M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
599k
                       XML_MAX_HUGE_LENGTH :
5239
1.74M
                       XML_MAX_TEXT_LENGTH;
5240
1.74M
    int cur, l;
5241
1.74M
    const xmlChar *target;
5242
1.74M
    xmlParserInputState state;
5243
1.74M
    int count = 0;
5244
5245
1.74M
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
1.74M
  int inputid = ctxt->input->id;
5247
1.74M
  state = ctxt->instate;
5248
1.74M
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
1.74M
  SKIP(2);
5253
1.74M
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
1.74M
        target = xmlParsePITarget(ctxt);
5260
1.74M
  if (target != NULL) {
5261
1.36M
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
264k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
264k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
264k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
264k
        (ctxt->sax->processingInstruction != NULL))
5274
239k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
239k
                                         target, NULL);
5276
264k
    if (ctxt->instate != XML_PARSER_EOF)
5277
264k
        ctxt->instate = state;
5278
264k
    return;
5279
264k
      }
5280
1.09M
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
1.09M
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
1.09M
      if (SKIP_BLANKS == 0) {
5287
535k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
535k
        "ParsePI: PI %s space expected\n", target);
5289
535k
      }
5290
1.09M
      cur = CUR_CHAR(l);
5291
60.8M
      while (IS_CHAR(cur) && /* checked */
5292
60.8M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
59.7M
    if (len + 5 >= size) {
5294
255k
        xmlChar *tmp;
5295
255k
                    size_t new_size = size * 2;
5296
255k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
255k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
255k
        buf = tmp;
5304
255k
                    size = new_size;
5305
255k
    }
5306
59.7M
    count++;
5307
59.7M
    if (count > 50) {
5308
788k
        SHRINK;
5309
788k
        GROW;
5310
788k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
788k
        count = 0;
5315
788k
    }
5316
59.7M
    COPY_BUF(l,buf,len,cur);
5317
59.7M
    NEXTL(l);
5318
59.7M
    cur = CUR_CHAR(l);
5319
59.7M
    if (cur == 0) {
5320
222k
        SHRINK;
5321
222k
        GROW;
5322
222k
        cur = CUR_CHAR(l);
5323
222k
    }
5324
59.7M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
59.7M
      }
5332
1.09M
      buf[len] = 0;
5333
1.09M
      if (cur != '?') {
5334
485k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
485k
          "ParsePI: PI %s never end ...\n", target);
5336
611k
      } else {
5337
611k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
611k
    SKIP(2);
5343
5344
611k
#ifdef LIBXML_CATALOG_ENABLED
5345
611k
    if (((state == XML_PARSER_MISC) ||
5346
611k
               (state == XML_PARSER_START)) &&
5347
611k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
28
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
28
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
28
      (allow == XML_CATA_ALLOW_ALL))
5351
28
      xmlParseCatalogPI(ctxt, buf);
5352
28
    }
5353
611k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
611k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
611k
        (ctxt->sax->processingInstruction != NULL))
5361
514k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
514k
                                         target, buf);
5363
611k
      }
5364
1.09M
      xmlFree(buf);
5365
1.09M
  } else {
5366
384k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
384k
  }
5368
1.48M
  if (ctxt->instate != XML_PARSER_EOF)
5369
1.48M
      ctxt->instate = state;
5370
1.48M
    }
5371
1.74M
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
29.4k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
29.4k
    const xmlChar *name;
5394
29.4k
    xmlChar *Pubid;
5395
29.4k
    xmlChar *Systemid;
5396
5397
29.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
29.4k
    SKIP(2);
5400
5401
29.4k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
19.3k
  int inputid = ctxt->input->id;
5403
19.3k
  SHRINK;
5404
19.3k
  SKIP(8);
5405
19.3k
  if (SKIP_BLANKS == 0) {
5406
2.70k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
2.70k
         "Space required after '<!NOTATION'\n");
5408
2.70k
      return;
5409
2.70k
  }
5410
5411
16.6k
        name = xmlParseName(ctxt);
5412
16.6k
  if (name == NULL) {
5413
1.34k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
1.34k
      return;
5415
1.34k
  }
5416
15.2k
  if (xmlStrchr(name, ':') != NULL) {
5417
348
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
348
         "colons are forbidden from notation names '%s'\n",
5419
348
         name, NULL, NULL);
5420
348
  }
5421
15.2k
  if (SKIP_BLANKS == 0) {
5422
1.87k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.87k
         "Space required after the NOTATION name'\n");
5424
1.87k
      return;
5425
1.87k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
13.4k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
13.4k
  SKIP_BLANKS;
5432
5433
13.4k
  if (RAW == '>') {
5434
6.95k
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
6.95k
      NEXT;
5440
6.95k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
6.95k
    (ctxt->sax->notationDecl != NULL))
5442
5.48k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
6.95k
  } else {
5444
6.46k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
6.46k
  }
5446
13.4k
  if (Systemid != NULL) xmlFree(Systemid);
5447
13.4k
  if (Pubid != NULL) xmlFree(Pubid);
5448
13.4k
    }
5449
29.4k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
492k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
492k
    const xmlChar *name = NULL;
5478
492k
    xmlChar *value = NULL;
5479
492k
    xmlChar *URI = NULL, *literal = NULL;
5480
492k
    const xmlChar *ndata = NULL;
5481
492k
    int isParameter = 0;
5482
492k
    xmlChar *orig = NULL;
5483
5484
492k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
492k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
492k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
478k
  int inputid = ctxt->input->id;
5491
478k
  SHRINK;
5492
478k
  SKIP(6);
5493
478k
  if (SKIP_BLANKS == 0) {
5494
13.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
13.7k
         "Space required after '<!ENTITY'\n");
5496
13.7k
  }
5497
5498
478k
  if (RAW == '%') {
5499
126k
      NEXT;
5500
126k
      if (SKIP_BLANKS == 0) {
5501
5.83k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
5.83k
             "Space required after '%%'\n");
5503
5.83k
      }
5504
126k
      isParameter = 1;
5505
126k
  }
5506
5507
478k
        name = xmlParseName(ctxt);
5508
478k
  if (name == NULL) {
5509
17.4k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
17.4k
                     "xmlParseEntityDecl: no name\n");
5511
17.4k
            return;
5512
17.4k
  }
5513
461k
  if (xmlStrchr(name, ':') != NULL) {
5514
2.95k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
2.95k
         "colons are forbidden from entities names '%s'\n",
5516
2.95k
         name, NULL, NULL);
5517
2.95k
  }
5518
461k
  if (SKIP_BLANKS == 0) {
5519
23.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
23.6k
         "Space required after the entity name\n");
5521
23.6k
  }
5522
5523
461k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
461k
  if (isParameter) {
5528
122k
      if ((RAW == '"') || (RAW == '\'')) {
5529
71.3k
          value = xmlParseEntityValue(ctxt, &orig);
5530
71.3k
    if (value) {
5531
55.1k
        if ((ctxt->sax != NULL) &&
5532
55.1k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
47.8k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
47.8k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
47.8k
            NULL, NULL, value);
5536
55.1k
    }
5537
71.3k
      } else {
5538
51.1k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
51.1k
    if ((URI == NULL) && (literal == NULL)) {
5540
7.67k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
7.67k
    }
5542
51.1k
    if (URI) {
5543
42.4k
        xmlURIPtr uri;
5544
5545
42.4k
        uri = xmlParseURI((const char *) URI);
5546
42.4k
        if (uri == NULL) {
5547
4.91k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
4.91k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
37.5k
        } else {
5555
37.5k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
1.44k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
36.1k
      } else {
5562
36.1k
          if ((ctxt->sax != NULL) &&
5563
36.1k
        (!ctxt->disableSAX) &&
5564
36.1k
        (ctxt->sax->entityDecl != NULL))
5565
33.6k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
33.6k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
33.6k
              literal, URI, NULL);
5568
36.1k
      }
5569
37.5k
      xmlFreeURI(uri);
5570
37.5k
        }
5571
42.4k
    }
5572
51.1k
      }
5573
338k
  } else {
5574
338k
      if ((RAW == '"') || (RAW == '\'')) {
5575
260k
          value = xmlParseEntityValue(ctxt, &orig);
5576
260k
    if ((ctxt->sax != NULL) &&
5577
260k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
190k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
190k
        XML_INTERNAL_GENERAL_ENTITY,
5580
190k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
260k
    if ((ctxt->myDoc == NULL) ||
5585
260k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
8.17k
        if (ctxt->myDoc == NULL) {
5587
2.88k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
2.88k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
2.88k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
2.88k
        }
5594
8.17k
        if (ctxt->myDoc->intSubset == NULL)
5595
2.88k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
2.88k
              BAD_CAST "fake", NULL, NULL);
5597
5598
8.17k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
8.17k
                    NULL, NULL, value);
5600
8.17k
    }
5601
260k
      } else {
5602
77.8k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
77.8k
    if ((URI == NULL) && (literal == NULL)) {
5604
20.5k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
20.5k
    }
5606
77.8k
    if (URI) {
5607
53.5k
        xmlURIPtr uri;
5608
5609
53.5k
        uri = xmlParseURI((const char *)URI);
5610
53.5k
        if (uri == NULL) {
5611
10.9k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
10.9k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
42.5k
        } else {
5619
42.5k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
725
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
725
      }
5626
42.5k
      xmlFreeURI(uri);
5627
42.5k
        }
5628
53.5k
    }
5629
77.8k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
28.7k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
28.7k
           "Space required before 'NDATA'\n");
5632
28.7k
    }
5633
77.8k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
20.2k
        SKIP(5);
5635
20.2k
        if (SKIP_BLANKS == 0) {
5636
1.33k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
1.33k
               "Space required after 'NDATA'\n");
5638
1.33k
        }
5639
20.2k
        ndata = xmlParseName(ctxt);
5640
20.2k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
20.2k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
18.4k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
18.4k
            literal, URI, ndata);
5644
57.5k
    } else {
5645
57.5k
        if ((ctxt->sax != NULL) &&
5646
57.5k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
43.2k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
43.2k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
43.2k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
57.5k
        if ((ctxt->replaceEntities != 0) &&
5655
57.5k
      ((ctxt->myDoc == NULL) ||
5656
33.6k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
1.41k
      if (ctxt->myDoc == NULL) {
5658
787
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
787
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
787
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
787
      }
5665
5666
1.41k
      if (ctxt->myDoc->intSubset == NULL)
5667
787
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
787
            BAD_CAST "fake", NULL, NULL);
5669
1.41k
      xmlSAX2EntityDecl(ctxt, name,
5670
1.41k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
1.41k
                  literal, URI, NULL);
5672
1.41k
        }
5673
57.5k
    }
5674
77.8k
      }
5675
338k
  }
5676
461k
  if (ctxt->instate == XML_PARSER_EOF)
5677
93
      goto done;
5678
461k
  SKIP_BLANKS;
5679
461k
  if (RAW != '>') {
5680
83.2k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
83.2k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
83.2k
      xmlHaltParser(ctxt);
5683
377k
  } else {
5684
377k
      if (inputid != ctxt->input->id) {
5685
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
3
                         "Entity declaration doesn't start and stop in"
5687
3
                               " the same entity\n");
5688
3
      }
5689
377k
      NEXT;
5690
377k
  }
5691
461k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
293k
      xmlEntityPtr cur = NULL;
5696
5697
293k
      if (isParameter) {
5698
57.9k
          if ((ctxt->sax != NULL) &&
5699
57.9k
        (ctxt->sax->getParameterEntity != NULL))
5700
57.9k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
235k
      } else {
5702
235k
          if ((ctxt->sax != NULL) &&
5703
235k
        (ctxt->sax->getEntity != NULL))
5704
235k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
235k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
51.0k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
51.0k
    }
5708
235k
      }
5709
293k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
215k
    cur->orig = orig;
5711
215k
                orig = NULL;
5712
215k
      }
5713
293k
  }
5714
5715
461k
done:
5716
461k
  if (value != NULL) xmlFree(value);
5717
461k
  if (URI != NULL) xmlFree(URI);
5718
461k
  if (literal != NULL) xmlFree(literal);
5719
461k
        if (orig != NULL) xmlFree(orig);
5720
461k
    }
5721
492k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
437k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
437k
    int val;
5757
437k
    xmlChar *ret;
5758
5759
437k
    *value = NULL;
5760
437k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
32.5k
  SKIP(9);
5762
32.5k
  return(XML_ATTRIBUTE_REQUIRED);
5763
32.5k
    }
5764
404k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
305k
  SKIP(8);
5766
305k
  return(XML_ATTRIBUTE_IMPLIED);
5767
305k
    }
5768
99.4k
    val = XML_ATTRIBUTE_NONE;
5769
99.4k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
21.0k
  SKIP(6);
5771
21.0k
  val = XML_ATTRIBUTE_FIXED;
5772
21.0k
  if (SKIP_BLANKS == 0) {
5773
644
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
644
         "Space required after '#FIXED'\n");
5775
644
  }
5776
21.0k
    }
5777
99.4k
    ret = xmlParseAttValue(ctxt);
5778
99.4k
    ctxt->instate = XML_PARSER_DTD;
5779
99.4k
    if (ret == NULL) {
5780
25.7k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
25.7k
           "Attribute default value declaration error\n");
5782
25.7k
    } else
5783
73.7k
        *value = ret;
5784
99.4k
    return(val);
5785
404k
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
6.19k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
6.19k
    const xmlChar *name;
5809
6.19k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
6.19k
    if (RAW != '(') {
5812
1.03k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1.03k
  return(NULL);
5814
1.03k
    }
5815
5.16k
    SHRINK;
5816
5.20k
    do {
5817
5.20k
        NEXT;
5818
5.20k
  SKIP_BLANKS;
5819
5.20k
        name = xmlParseName(ctxt);
5820
5.20k
  if (name == NULL) {
5821
510
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
510
         "Name expected in NOTATION declaration\n");
5823
510
            xmlFreeEnumeration(ret);
5824
510
      return(NULL);
5825
510
  }
5826
4.69k
  tmp = ret;
5827
4.72k
  while (tmp != NULL) {
5828
33
      if (xmlStrEqual(name, tmp->name)) {
5829
3
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
3
    "standalone: attribute notation value token %s duplicated\n",
5831
3
         name, NULL);
5832
3
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
3
    break;
5835
3
      }
5836
30
      tmp = tmp->next;
5837
30
  }
5838
4.69k
  if (tmp == NULL) {
5839
4.69k
      cur = xmlCreateEnumeration(name);
5840
4.69k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
4.69k
      if (last == NULL) ret = last = cur;
5845
24
      else {
5846
24
    last->next = cur;
5847
24
    last = cur;
5848
24
      }
5849
4.69k
  }
5850
4.69k
  SKIP_BLANKS;
5851
4.69k
    } while (RAW == '|');
5852
4.65k
    if (RAW != ')') {
5853
727
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
727
        xmlFreeEnumeration(ret);
5855
727
  return(NULL);
5856
727
    }
5857
3.92k
    NEXT;
5858
3.92k
    return(ret);
5859
4.65k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
75.9k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
75.9k
    xmlChar *name;
5881
75.9k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
75.9k
    if (RAW != '(') {
5884
38.5k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
38.5k
  return(NULL);
5886
38.5k
    }
5887
37.3k
    SHRINK;
5888
87.4k
    do {
5889
87.4k
        NEXT;
5890
87.4k
  SKIP_BLANKS;
5891
87.4k
        name = xmlParseNmtoken(ctxt);
5892
87.4k
  if (name == NULL) {
5893
1.45k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
1.45k
      return(ret);
5895
1.45k
  }
5896
86.0k
  tmp = ret;
5897
197k
  while (tmp != NULL) {
5898
112k
      if (xmlStrEqual(name, tmp->name)) {
5899
923
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
923
    "standalone: attribute enumeration value token %s duplicated\n",
5901
923
         name, NULL);
5902
923
    if (!xmlDictOwns(ctxt->dict, name))
5903
923
        xmlFree(name);
5904
923
    break;
5905
923
      }
5906
111k
      tmp = tmp->next;
5907
111k
  }
5908
86.0k
  if (tmp == NULL) {
5909
85.0k
      cur = xmlCreateEnumeration(name);
5910
85.0k
      if (!xmlDictOwns(ctxt->dict, name))
5911
85.0k
    xmlFree(name);
5912
85.0k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
85.0k
      if (last == NULL) ret = last = cur;
5917
48.9k
      else {
5918
48.9k
    last->next = cur;
5919
48.9k
    last = cur;
5920
48.9k
      }
5921
85.0k
  }
5922
86.0k
  SKIP_BLANKS;
5923
86.0k
    } while (RAW == '|');
5924
35.9k
    if (RAW != ')') {
5925
2.71k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
2.71k
  return(ret);
5927
2.71k
    }
5928
33.2k
    NEXT;
5929
33.2k
    return(ret);
5930
35.9k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
83.1k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
83.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
7.17k
  SKIP(8);
5953
7.17k
  if (SKIP_BLANKS == 0) {
5954
975
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
975
         "Space required after 'NOTATION'\n");
5956
975
      return(0);
5957
975
  }
5958
6.19k
  *tree = xmlParseNotationType(ctxt);
5959
6.19k
  if (*tree == NULL) return(0);
5960
3.92k
  return(XML_ATTRIBUTE_NOTATION);
5961
6.19k
    }
5962
75.9k
    *tree = xmlParseEnumerationType(ctxt);
5963
75.9k
    if (*tree == NULL) return(0);
5964
36.1k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
75.9k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
493k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
493k
    SHRINK;
6017
493k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
174k
  SKIP(5);
6019
174k
  return(XML_ATTRIBUTE_CDATA);
6020
319k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
25.2k
  SKIP(6);
6022
25.2k
  return(XML_ATTRIBUTE_IDREFS);
6023
293k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
9.52k
  SKIP(5);
6025
9.52k
  return(XML_ATTRIBUTE_IDREF);
6026
284k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
85.5k
        SKIP(2);
6028
85.5k
  return(XML_ATTRIBUTE_ID);
6029
198k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
16.1k
  SKIP(6);
6031
16.1k
  return(XML_ATTRIBUTE_ENTITY);
6032
182k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
3.64k
  SKIP(8);
6034
3.64k
  return(XML_ATTRIBUTE_ENTITIES);
6035
178k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
55.1k
  SKIP(8);
6037
55.1k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
123k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
40.5k
  SKIP(7);
6040
40.5k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
40.5k
     }
6042
83.1k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
493k
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
441k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
441k
    const xmlChar *elemName;
6061
441k
    const xmlChar *attrName;
6062
441k
    xmlEnumerationPtr tree;
6063
6064
441k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
441k
    SKIP(2);
6067
6068
441k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
420k
  int inputid = ctxt->input->id;
6070
6071
420k
  SKIP(7);
6072
420k
  if (SKIP_BLANKS == 0) {
6073
26.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
26.6k
                     "Space required after '<!ATTLIST'\n");
6075
26.6k
  }
6076
420k
        elemName = xmlParseName(ctxt);
6077
420k
  if (elemName == NULL) {
6078
17.9k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
17.9k
         "ATTLIST: no name for Element\n");
6080
17.9k
      return;
6081
17.9k
  }
6082
402k
  SKIP_BLANKS;
6083
402k
  GROW;
6084
794k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
557k
      int type;
6086
557k
      int def;
6087
557k
      xmlChar *defaultValue = NULL;
6088
6089
557k
      GROW;
6090
557k
            tree = NULL;
6091
557k
      attrName = xmlParseName(ctxt);
6092
557k
      if (attrName == NULL) {
6093
38.0k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
38.0k
             "ATTLIST: no name for Attribute\n");
6095
38.0k
    break;
6096
38.0k
      }
6097
519k
      GROW;
6098
519k
      if (SKIP_BLANKS == 0) {
6099
25.5k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
25.5k
            "Space required after the attribute name\n");
6101
25.5k
    break;
6102
25.5k
      }
6103
6104
493k
      type = xmlParseAttributeType(ctxt, &tree);
6105
493k
      if (type <= 0) {
6106
43.0k
          break;
6107
43.0k
      }
6108
6109
450k
      GROW;
6110
450k
      if (SKIP_BLANKS == 0) {
6111
12.9k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
12.9k
             "Space required after the attribute type\n");
6113
12.9k
          if (tree != NULL)
6114
3.87k
        xmlFreeEnumeration(tree);
6115
12.9k
    break;
6116
12.9k
      }
6117
6118
437k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
437k
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
437k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
29.9k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
437k
      GROW;
6130
437k
            if (RAW != '>') {
6131
257k
    if (SKIP_BLANKS == 0) {
6132
45.2k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
45.2k
      "Space required after the attribute default value\n");
6134
45.2k
        if (defaultValue != NULL)
6135
17.9k
      xmlFree(defaultValue);
6136
45.2k
        if (tree != NULL)
6137
3.82k
      xmlFreeEnumeration(tree);
6138
45.2k
        break;
6139
45.2k
    }
6140
257k
      }
6141
392k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
392k
    (ctxt->sax->attributeDecl != NULL))
6143
353k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
353k
                          type, def, defaultValue, tree);
6145
38.5k
      else if (tree != NULL)
6146
3.63k
    xmlFreeEnumeration(tree);
6147
6148
392k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
392k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
392k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
40.9k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
40.9k
      }
6153
392k
      if (ctxt->sax2) {
6154
301k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
301k
      }
6156
392k
      if (defaultValue != NULL)
6157
55.7k
          xmlFree(defaultValue);
6158
392k
      GROW;
6159
392k
  }
6160
402k
  if (RAW == '>') {
6161
239k
      if (inputid != ctxt->input->id) {
6162
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
0
                               "Attribute list declaration doesn't start and"
6164
0
                               " stop in the same entity\n");
6165
0
      }
6166
239k
      NEXT;
6167
239k
  }
6168
402k
    }
6169
441k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
123k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
123k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
123k
    const xmlChar *elem = NULL;
6196
6197
123k
    GROW;
6198
123k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
123k
  SKIP(7);
6200
123k
  SKIP_BLANKS;
6201
123k
  SHRINK;
6202
123k
  if (RAW == ')') {
6203
94.2k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
94.2k
      NEXT;
6209
94.2k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
94.2k
      if (ret == NULL)
6211
0
          return(NULL);
6212
94.2k
      if (RAW == '*') {
6213
70
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
70
    NEXT;
6215
70
      }
6216
94.2k
      return(ret);
6217
94.2k
  }
6218
29.0k
  if ((RAW == '(') || (RAW == '|')) {
6219
26.7k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
26.7k
      if (ret == NULL) return(NULL);
6221
26.7k
  }
6222
173k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
145k
      NEXT;
6224
145k
      if (elem == NULL) {
6225
26.5k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
26.5k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
26.5k
    ret->c1 = cur;
6231
26.5k
    if (cur != NULL)
6232
26.5k
        cur->parent = ret;
6233
26.5k
    cur = ret;
6234
119k
      } else {
6235
119k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
119k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
119k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
119k
    if (n->c1 != NULL)
6242
119k
        n->c1->parent = n;
6243
119k
          cur->c2 = n;
6244
119k
    if (n != NULL)
6245
119k
        n->parent = cur;
6246
119k
    cur = n;
6247
119k
      }
6248
145k
      SKIP_BLANKS;
6249
145k
      elem = xmlParseName(ctxt);
6250
145k
      if (elem == NULL) {
6251
910
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
910
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
910
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
910
    return(NULL);
6255
910
      }
6256
144k
      SKIP_BLANKS;
6257
144k
      GROW;
6258
144k
  }
6259
28.1k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
20.1k
      if (elem != NULL) {
6261
20.1k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
20.1k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
20.1k
    if (cur->c2 != NULL)
6264
20.1k
        cur->c2->parent = cur;
6265
20.1k
            }
6266
20.1k
            if (ret != NULL)
6267
20.1k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
20.1k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
20.1k
      SKIP(2);
6274
20.1k
  } else {
6275
7.98k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
7.98k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
7.98k
      return(NULL);
6278
7.98k
  }
6279
6280
28.1k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
20.1k
    return(ret);
6284
123k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
429k
                                       int depth) {
6321
429k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
429k
    const xmlChar *elem;
6323
429k
    xmlChar type = 0;
6324
6325
429k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
429k
        (depth >  2048)) {
6327
50
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
50
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
50
                          depth);
6330
50
  return(NULL);
6331
50
    }
6332
429k
    SKIP_BLANKS;
6333
429k
    GROW;
6334
429k
    if (RAW == '(') {
6335
184k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
184k
  NEXT;
6339
184k
  SKIP_BLANKS;
6340
184k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
184k
                                                           depth + 1);
6342
184k
        if (cur == NULL)
6343
179k
            return(NULL);
6344
4.91k
  SKIP_BLANKS;
6345
4.91k
  GROW;
6346
245k
    } else {
6347
245k
  elem = xmlParseName(ctxt);
6348
245k
  if (elem == NULL) {
6349
29.1k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
29.1k
      return(NULL);
6351
29.1k
  }
6352
216k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
216k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
216k
  GROW;
6358
216k
  if (RAW == '?') {
6359
5.80k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
5.80k
      NEXT;
6361
210k
  } else if (RAW == '*') {
6362
36.9k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
36.9k
      NEXT;
6364
173k
  } else if (RAW == '+') {
6365
34.0k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
34.0k
      NEXT;
6367
139k
  } else {
6368
139k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
139k
  }
6370
216k
  GROW;
6371
216k
    }
6372
220k
    SKIP_BLANKS;
6373
220k
    SHRINK;
6374
405k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
239k
        if (RAW == ',') {
6379
67.7k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
36.0k
      else if (type != CUR) {
6385
213
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
213
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
213
                      type);
6388
213
    if ((last != NULL) && (last != ret))
6389
213
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
213
    if (ret != NULL)
6391
213
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
213
    return(NULL);
6393
213
      }
6394
67.4k
      NEXT;
6395
6396
67.4k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
67.4k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
67.4k
      if (last == NULL) {
6404
31.6k
    op->c1 = ret;
6405
31.6k
    if (ret != NULL)
6406
31.6k
        ret->parent = op;
6407
31.6k
    ret = cur = op;
6408
35.8k
      } else {
6409
35.8k
          cur->c2 = op;
6410
35.8k
    if (op != NULL)
6411
35.8k
        op->parent = cur;
6412
35.8k
    op->c1 = last;
6413
35.8k
    if (last != NULL)
6414
35.8k
        last->parent = op;
6415
35.8k
    cur =op;
6416
35.8k
    last = NULL;
6417
35.8k
      }
6418
171k
  } else if (RAW == '|') {
6419
121k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
87.1k
      else if (type != CUR) {
6425
212
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
212
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
212
          type);
6428
212
    if ((last != NULL) && (last != ret))
6429
212
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
212
    if (ret != NULL)
6431
212
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
212
    return(NULL);
6433
212
      }
6434
121k
      NEXT;
6435
6436
121k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
121k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
121k
      if (last == NULL) {
6445
34.4k
    op->c1 = ret;
6446
34.4k
    if (ret != NULL)
6447
34.4k
        ret->parent = op;
6448
34.4k
    ret = cur = op;
6449
86.9k
      } else {
6450
86.9k
          cur->c2 = op;
6451
86.9k
    if (op != NULL)
6452
86.9k
        op->parent = cur;
6453
86.9k
    op->c1 = last;
6454
86.9k
    if (last != NULL)
6455
86.9k
        last->parent = op;
6456
86.9k
    cur =op;
6457
86.9k
    last = NULL;
6458
86.9k
      }
6459
121k
  } else {
6460
50.0k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
50.0k
      if ((last != NULL) && (last != ret))
6462
9.20k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
50.0k
      if (ret != NULL)
6464
50.0k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
50.0k
      return(NULL);
6466
50.0k
  }
6467
188k
  GROW;
6468
188k
  SKIP_BLANKS;
6469
188k
  GROW;
6470
188k
  if (RAW == '(') {
6471
11.3k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
11.3k
      NEXT;
6474
11.3k
      SKIP_BLANKS;
6475
11.3k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
11.3k
                                                          depth + 1);
6477
11.3k
            if (last == NULL) {
6478
1.74k
    if (ret != NULL)
6479
1.74k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
1.74k
    return(NULL);
6481
1.74k
            }
6482
9.55k
      SKIP_BLANKS;
6483
177k
  } else {
6484
177k
      elem = xmlParseName(ctxt);
6485
177k
      if (elem == NULL) {
6486
2.77k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.77k
    if (ret != NULL)
6488
2.77k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.77k
    return(NULL);
6490
2.77k
      }
6491
174k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
174k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
174k
      if (RAW == '?') {
6498
30.5k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
30.5k
    NEXT;
6500
144k
      } else if (RAW == '*') {
6501
14.9k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
14.9k
    NEXT;
6503
129k
      } else if (RAW == '+') {
6504
6.76k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
6.76k
    NEXT;
6506
122k
      } else {
6507
122k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
122k
      }
6509
174k
  }
6510
184k
  SKIP_BLANKS;
6511
184k
  GROW;
6512
184k
    }
6513
165k
    if ((cur != NULL) && (last != NULL)) {
6514
51.9k
        cur->c2 = last;
6515
51.9k
  if (last != NULL)
6516
51.9k
      last->parent = cur;
6517
51.9k
    }
6518
165k
    if (ctxt->input->id != inputchk) {
6519
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
0
                       "Element content declaration doesn't start and stop in"
6521
0
                       " the same entity\n");
6522
0
    }
6523
165k
    NEXT;
6524
165k
    if (RAW == '?') {
6525
2.35k
  if (ret != NULL) {
6526
2.35k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
2.35k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
169
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
2.18k
      else
6530
2.18k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
2.35k
  }
6532
2.35k
  NEXT;
6533
163k
    } else if (RAW == '*') {
6534
23.8k
  if (ret != NULL) {
6535
23.8k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
23.8k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
95.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
71.7k
    if ((cur->c1 != NULL) &&
6543
71.7k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
71.7k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
11.5k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
71.7k
    if ((cur->c2 != NULL) &&
6547
71.7k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
71.7k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
1.88k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
71.7k
    cur = cur->c2;
6551
71.7k
      }
6552
23.8k
  }
6553
23.8k
  NEXT;
6554
139k
    } else if (RAW == '+') {
6555
6.34k
  if (ret != NULL) {
6556
6.34k
      int found = 0;
6557
6558
6.34k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
6.34k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
26
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
6.32k
      else
6562
6.32k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
9.36k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
3.01k
    if ((cur->c1 != NULL) &&
6570
3.01k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
3.01k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
13
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
13
        found = 1;
6574
13
    }
6575
3.01k
    if ((cur->c2 != NULL) &&
6576
3.01k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
3.01k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
0
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
0
        found = 1;
6580
0
    }
6581
3.01k
    cur = cur->c2;
6582
3.01k
      }
6583
6.34k
      if (found)
6584
13
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
6.34k
  }
6586
6.34k
  NEXT;
6587
6.34k
    }
6588
165k
    return(ret);
6589
220k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
357k
                           xmlElementContentPtr *result) {
6648
6649
357k
    xmlElementContentPtr tree = NULL;
6650
357k
    int inputid = ctxt->input->id;
6651
357k
    int res;
6652
6653
357k
    *result = NULL;
6654
6655
357k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
357k
    NEXT;
6661
357k
    GROW;
6662
357k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
357k
    SKIP_BLANKS;
6665
357k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
123k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
123k
  res = XML_ELEMENT_TYPE_MIXED;
6668
233k
    } else {
6669
233k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
233k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
233k
    }
6672
357k
    SKIP_BLANKS;
6673
357k
    *result = tree;
6674
357k
    return(res);
6675
357k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
527k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
527k
    const xmlChar *name;
6695
527k
    int ret = -1;
6696
527k
    xmlElementContentPtr content  = NULL;
6697
6698
527k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
527k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
527k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
509k
  int inputid = ctxt->input->id;
6705
6706
509k
  SKIP(7);
6707
509k
  if (SKIP_BLANKS == 0) {
6708
11.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
11.2k
               "Space required after 'ELEMENT'\n");
6710
11.2k
      return(-1);
6711
11.2k
  }
6712
498k
        name = xmlParseName(ctxt);
6713
498k
  if (name == NULL) {
6714
24.1k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
24.1k
         "xmlParseElementDecl: no name for Element\n");
6716
24.1k
      return(-1);
6717
24.1k
  }
6718
473k
  if (SKIP_BLANKS == 0) {
6719
52.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
52.5k
         "Space required after the element name\n");
6721
52.5k
  }
6722
473k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
72.3k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
72.3k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
401k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
401k
             (NXT(2) == 'Y')) {
6730
3.64k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
3.64k
      ret = XML_ELEMENT_TYPE_ANY;
6735
397k
  } else if (RAW == '(') {
6736
357k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
357k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
40.6k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
40.6k
          (ctxt->inputNr == 1)) {
6743
2.72k
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
2.72k
    "PEReference: forbidden within markup decl in internal subset\n");
6745
37.9k
      } else {
6746
37.9k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
37.9k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
37.9k
            }
6749
40.6k
      return(-1);
6750
40.6k
  }
6751
6752
433k
  SKIP_BLANKS;
6753
6754
433k
  if (RAW != '>') {
6755
90.3k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
90.3k
      if (content != NULL) {
6757
4.38k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
4.38k
      }
6759
342k
  } else {
6760
342k
      if (inputid != ctxt->input->id) {
6761
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
0
                               "Element declaration doesn't start and stop in"
6763
0
                               " the same entity\n");
6764
0
      }
6765
6766
342k
      NEXT;
6767
342k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
342k
    (ctxt->sax->elementDecl != NULL)) {
6769
301k
    if (content != NULL)
6770
230k
        content->parent = NULL;
6771
301k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
301k
                           content);
6773
301k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
5.83k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
5.83k
    }
6782
301k
      } else if (content != NULL) {
6783
31.3k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
31.3k
      }
6785
342k
  }
6786
433k
    }
6787
451k
    return(ret);
6788
527k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
1.56k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
1.56k
    int *inputIds = NULL;
6806
1.56k
    size_t inputIdsSize = 0;
6807
1.56k
    size_t depth = 0;
6808
6809
6.59k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
6.57k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
3.64k
            int id = ctxt->input->id;
6812
6813
3.64k
            SKIP(3);
6814
3.64k
            SKIP_BLANKS;
6815
6816
3.64k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
2.82k
                SKIP(7);
6818
2.82k
                SKIP_BLANKS;
6819
2.82k
                if (RAW != '[') {
6820
61
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
61
                    xmlHaltParser(ctxt);
6822
61
                    goto error;
6823
61
                }
6824
2.76k
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
2.76k
                NEXT;
6830
6831
2.76k
                if (inputIdsSize <= depth) {
6832
905
                    int *tmp;
6833
6834
905
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
905
                    tmp = (int *) xmlRealloc(inputIds,
6836
905
                            inputIdsSize * sizeof(int));
6837
905
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
905
                    inputIds = tmp;
6842
905
                }
6843
2.76k
                inputIds[depth] = id;
6844
2.76k
                depth++;
6845
2.76k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
358
                size_t ignoreDepth = 0;
6847
6848
358
                SKIP(6);
6849
358
                SKIP_BLANKS;
6850
358
                if (RAW != '[') {
6851
15
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
15
                    xmlHaltParser(ctxt);
6853
15
                    goto error;
6854
15
                }
6855
343
                if (ctxt->input->id != id) {
6856
3
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
3
                                   "All markup of the conditional section is"
6858
3
                                   " not in the same entity\n");
6859
3
                }
6860
343
                NEXT;
6861
6862
48.2k
                while (RAW != 0) {
6863
48.0k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
548
                        SKIP(3);
6865
548
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
548
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
47.5k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
47.5k
                               (NXT(2) == '>')) {
6873
461
                        if (ignoreDepth == 0)
6874
146
                            break;
6875
315
                        SKIP(3);
6876
315
                        ignoreDepth--;
6877
47.0k
                    } else {
6878
47.0k
                        NEXT;
6879
47.0k
                    }
6880
48.0k
                }
6881
6882
343
    if (RAW == 0) {
6883
197
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
197
                    goto error;
6885
197
    }
6886
146
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
146
                SKIP(3);
6892
458
            } else {
6893
458
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
458
                xmlHaltParser(ctxt);
6895
458
                goto error;
6896
458
            }
6897
3.64k
        } else if ((depth > 0) &&
6898
2.93k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
1.47k
            depth--;
6900
1.47k
            if (ctxt->input->id != inputIds[depth]) {
6901
48
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
48
                               "All markup of the conditional section is not"
6903
48
                               " in the same entity\n");
6904
48
            }
6905
1.47k
            SKIP(3);
6906
1.47k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
1.05k
            xmlParseMarkupDecl(ctxt);
6908
1.05k
        } else {
6909
399
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
399
            xmlHaltParser(ctxt);
6911
399
            goto error;
6912
399
        }
6913
6914
5.44k
        if (depth == 0)
6915
416
            break;
6916
6917
5.03k
        SKIP_BLANKS;
6918
5.03k
        GROW;
6919
5.03k
    }
6920
6921
1.56k
error:
6922
1.56k
    xmlFree(inputIds);
6923
1.56k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
9.34M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
9.34M
    GROW;
6952
9.34M
    if (CUR == '<') {
6953
9.34M
        if (NXT(1) == '!') {
6954
9.33M
      switch (NXT(2)) {
6955
1.02M
          case 'E':
6956
1.02M
        if (NXT(3) == 'L')
6957
527k
      xmlParseElementDecl(ctxt);
6958
497k
        else if (NXT(3) == 'N')
6959
492k
      xmlParseEntityDecl(ctxt);
6960
5.37k
                    else
6961
5.37k
                        SKIP(2);
6962
1.02M
        break;
6963
441k
          case 'A':
6964
441k
        xmlParseAttributeListDecl(ctxt);
6965
441k
        break;
6966
29.4k
          case 'N':
6967
29.4k
        xmlParseNotationDecl(ctxt);
6968
29.4k
        break;
6969
7.76M
          case '-':
6970
7.76M
        xmlParseComment(ctxt);
6971
7.76M
        break;
6972
72.2k
    default:
6973
        /* there is an error but it will be detected later */
6974
72.2k
                    SKIP(2);
6975
72.2k
        break;
6976
9.33M
      }
6977
9.33M
  } else if (NXT(1) == '?') {
6978
12.7k
      xmlParsePI(ctxt);
6979
12.7k
  }
6980
9.34M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
9.34M
    if (ctxt->instate == XML_PARSER_EOF)
6987
83.3k
        return;
6988
6989
9.26M
    ctxt->instate = XML_PARSER_DTD;
6990
9.26M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
2.94k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
2.94k
    xmlChar *version;
7006
2.94k
    const xmlChar *encoding;
7007
2.94k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
2.94k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
2.85k
  SKIP(5);
7014
2.85k
    } else {
7015
93
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
93
  return;
7017
93
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
2.85k
    oldstate = ctxt->instate;
7021
2.85k
    ctxt->instate = XML_PARSER_START;
7022
7023
2.85k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
2.85k
    version = xmlParseVersionInfo(ctxt);
7032
2.85k
    if (version == NULL)
7033
585
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
2.26k
    else {
7035
2.26k
  if (SKIP_BLANKS == 0) {
7036
119
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
119
               "Space needed here\n");
7038
119
  }
7039
2.26k
    }
7040
2.85k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
2.85k
    encoding = xmlParseEncodingDecl(ctxt);
7046
2.85k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
2.85k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
74
        ctxt->instate = oldstate;
7053
74
        return;
7054
74
    }
7055
2.78k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
1.13k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
1.13k
           "Missing encoding in text declaration\n");
7058
1.13k
    }
7059
7060
2.78k
    SKIP_BLANKS;
7061
2.78k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
1.18k
        SKIP(2);
7063
1.59k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
50
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
50
  NEXT;
7067
1.54k
    } else {
7068
1.54k
        int c;
7069
7070
1.54k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
65.8k
        while ((c = CUR) != 0) {
7072
65.4k
            NEXT;
7073
65.4k
            if (c == '>')
7074
1.15k
                break;
7075
65.4k
        }
7076
1.54k
    }
7077
7078
2.78k
    ctxt->instate = oldstate;
7079
2.78k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
11.4k
                       const xmlChar *SystemID) {
7096
11.4k
    xmlDetectSAX2(ctxt);
7097
11.4k
    GROW;
7098
7099
11.4k
    if ((ctxt->encoding == NULL) &&
7100
11.4k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
11.4k
        xmlChar start[4];
7102
11.4k
  xmlCharEncoding enc;
7103
7104
11.4k
  start[0] = RAW;
7105
11.4k
  start[1] = NXT(1);
7106
11.4k
  start[2] = NXT(2);
7107
11.4k
  start[3] = NXT(3);
7108
11.4k
  enc = xmlDetectCharEncoding(start, 4);
7109
11.4k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
2.33k
      xmlSwitchEncoding(ctxt, enc);
7111
11.4k
    }
7112
7113
11.4k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
2.12k
  xmlParseTextDecl(ctxt);
7115
2.12k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
26
      xmlHaltParser(ctxt);
7120
26
      return;
7121
26
  }
7122
2.12k
    }
7123
11.4k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
11.4k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
11.4k
    ctxt->instate = XML_PARSER_DTD;
7135
11.4k
    ctxt->external = 1;
7136
11.4k
    SKIP_BLANKS;
7137
144k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
138k
  GROW;
7139
138k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
1.56k
            xmlParseConditionalSections(ctxt);
7141
136k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
131k
            xmlParseMarkupDecl(ctxt);
7143
131k
        } else {
7144
5.03k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
5.03k
            xmlHaltParser(ctxt);
7146
5.03k
            return;
7147
5.03k
        }
7148
133k
        SKIP_BLANKS;
7149
133k
    }
7150
7151
6.41k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
6.41k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
7.81M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
7.81M
    xmlEntityPtr ent;
7175
7.81M
    xmlChar *val;
7176
7.81M
    int was_checked;
7177
7.81M
    xmlNodePtr list = NULL;
7178
7.81M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
7.81M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
7.81M
    if (NXT(1) == '#') {
7188
1.85M
  int i = 0;
7189
1.85M
  xmlChar out[16];
7190
1.85M
  int hex = NXT(2);
7191
1.85M
  int value = xmlParseCharRef(ctxt);
7192
7193
1.85M
  if (value == 0)
7194
706k
      return;
7195
1.14M
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
939k
      if (value <= 0xFF) {
7202
873k
    out[0] = value;
7203
873k
    out[1] = 0;
7204
873k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
873k
        (!ctxt->disableSAX))
7206
769k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
873k
      } else {
7208
66.2k
    if ((hex == 'x') || (hex == 'X'))
7209
17.9k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
48.2k
    else
7211
48.2k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
66.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
66.2k
        (!ctxt->disableSAX))
7214
59.3k
        ctxt->sax->reference(ctxt->userData, out);
7215
66.2k
      }
7216
939k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
204k
      COPY_BUF(0 ,out, i, value);
7221
204k
      out[i] = 0;
7222
204k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
204k
    (!ctxt->disableSAX))
7224
181k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
204k
  }
7226
1.14M
  return;
7227
1.85M
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
5.96M
    ent = xmlParseEntityRef(ctxt);
7233
5.96M
    if (ent == NULL) return;
7234
1.16M
    if (!ctxt->wellFormed)
7235
699k
  return;
7236
463k
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
463k
    if ((ent->name == NULL) ||
7240
463k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
65.1k
  val = ent->content;
7242
65.1k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
65.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
65.1k
      (!ctxt->disableSAX))
7248
65.1k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
65.1k
  return;
7250
65.1k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
398k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
398k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
19.4k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
19.1k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
19.1k
  void *user_data;
7273
19.1k
  if (ctxt->userData == ctxt)
7274
19.1k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
19.1k
        ctxt->sizeentcopy = 0;
7280
7281
19.1k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
63
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
63
            xmlHaltParser(ctxt);
7284
63
            return;
7285
63
        }
7286
7287
19.1k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
19.1k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
14.5k
      ctxt->depth++;
7297
14.5k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
14.5k
                                                user_data, &list);
7299
14.5k
      ctxt->depth--;
7300
7301
14.5k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
4.52k
      ctxt->depth++;
7303
4.52k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
4.52k
                                     user_data, ctxt->depth, ent->URI,
7305
4.52k
             ent->ExternalID, &list);
7306
4.52k
      ctxt->depth--;
7307
4.52k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
19.1k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
19.1k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
19.1k
        ent->expandedSize = ctxt->sizeentcopy;
7316
19.1k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
2.63k
            xmlHaltParser(ctxt);
7318
2.63k
      xmlFreeNodeList(list);
7319
2.63k
      return;
7320
2.63k
  }
7321
16.4k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
16.4k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
11.0k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
11.0k
            if ((ctxt->replaceEntities == 0) ||
7333
11.0k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
11.0k
                ((list->type == XML_TEXT_NODE) &&
7335
8.73k
                 (list->next == NULL))) {
7336
8.73k
                ent->owner = 1;
7337
27.0k
                while (list != NULL) {
7338
18.2k
                    list->parent = (xmlNodePtr) ent;
7339
18.2k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
18.2k
                    if (list->next == NULL)
7342
8.73k
                        ent->last = list;
7343
18.2k
                    list = list->next;
7344
18.2k
                }
7345
8.73k
                list = NULL;
7346
8.73k
            } else {
7347
2.35k
                ent->owner = 0;
7348
16.0k
                while (list != NULL) {
7349
13.6k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
13.6k
                    list->doc = ctxt->myDoc;
7351
13.6k
                    if (list->next == NULL)
7352
2.35k
                        ent->last = list;
7353
13.6k
                    list = list->next;
7354
13.6k
                }
7355
2.35k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
2.35k
            }
7361
11.0k
  } else if ((ret != XML_ERR_OK) &&
7362
5.40k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
3.34k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
3.34k
         "Entity '%s' failed to parse\n", ent->name);
7365
3.34k
            if (ent->content != NULL)
7366
1.38k
                ent->content[0] = 0;
7367
3.34k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
16.4k
        was_checked = 0;
7374
16.4k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
395k
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
5.80k
  if (was_checked != 0) {
7389
92
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
92
      if (ctxt->userData == ctxt)
7396
92
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
92
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
0
    ctxt->depth++;
7402
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
0
           ent->content, user_data, NULL);
7404
0
    ctxt->depth--;
7405
92
      } else if (ent->etype ==
7406
92
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
92
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
92
    ctxt->depth++;
7410
92
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
92
         ctxt->sax, user_data, ctxt->depth,
7412
92
         ent->URI, ent->ExternalID, NULL);
7413
92
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
92
                ctxt->sizeentities = oldsizeentities;
7417
92
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
92
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
92
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
92
  }
7429
5.80k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
5.80k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
966
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
966
  }
7437
5.80k
  return;
7438
5.80k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
389k
    if ((was_checked != 0) &&
7445
389k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
93
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
389k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
389k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
2.53k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
2.53k
  return;
7458
2.53k
    }
7459
7460
386k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
386k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
386k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
386k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
125k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
125k
    cur = ent->children;
7492
132k
    while (cur != NULL) {
7493
132k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
132k
        if (nw != NULL) {
7495
132k
      if (nw->_private == NULL)
7496
132k
          nw->_private = cur->_private;
7497
132k
      if (firstChild == NULL){
7498
125k
          firstChild = nw;
7499
125k
      }
7500
132k
      nw = xmlAddChild(ctxt->node, nw);
7501
132k
        }
7502
132k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
125k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
125k
          (nw != NULL) &&
7509
125k
          (nw->type == XML_ELEMENT_NODE) &&
7510
125k
          (nw->children == NULL))
7511
1.09k
          nw->extra = 1;
7512
7513
125k
      break;
7514
125k
        }
7515
7.10k
        cur = cur->next;
7516
7.10k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
261k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
261k
    xmlNodePtr nw = NULL, cur, next, last,
7523
261k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
261k
    cur = ent->children;
7532
261k
    ent->children = NULL;
7533
261k
    last = ent->last;
7534
261k
    ent->last = NULL;
7535
275k
    while (cur != NULL) {
7536
275k
        next = cur->next;
7537
275k
        cur->next = NULL;
7538
275k
        cur->parent = NULL;
7539
275k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
275k
        if (nw != NULL) {
7541
275k
      if (nw->_private == NULL)
7542
275k
          nw->_private = cur->_private;
7543
275k
      if (firstChild == NULL){
7544
261k
          firstChild = cur;
7545
261k
      }
7546
275k
      xmlAddChild((xmlNodePtr) ent, nw);
7547
275k
        }
7548
275k
        xmlAddChild(ctxt->node, cur);
7549
275k
        if (cur == last)
7550
261k
      break;
7551
14.2k
        cur = next;
7552
14.2k
    }
7553
261k
    if (ent->owner == 0)
7554
2.35k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
261k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
386k
      ctxt->nodemem = 0;
7582
386k
      ctxt->nodelen = 0;
7583
386k
      return;
7584
386k
  }
7585
386k
    }
7586
386k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
7.08M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
7.08M
    const xmlChar *name;
7621
7.08M
    xmlEntityPtr ent = NULL;
7622
7623
7.08M
    GROW;
7624
7.08M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
7.08M
    if (RAW != '&')
7628
0
        return(NULL);
7629
7.08M
    NEXT;
7630
7.08M
    name = xmlParseName(ctxt);
7631
7.08M
    if (name == NULL) {
7632
3.27M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
3.27M
           "xmlParseEntityRef: no name\n");
7634
3.27M
        return(NULL);
7635
3.27M
    }
7636
3.81M
    if (RAW != ';') {
7637
1.34M
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
1.34M
  return(NULL);
7639
1.34M
    }
7640
2.46M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
2.46M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
1.88M
        ent = xmlGetPredefinedEntity(name);
7647
1.88M
        if (ent != NULL)
7648
586k
            return(ent);
7649
1.88M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
1.88M
    if (ctxt->sax != NULL) {
7656
1.88M
  if (ctxt->sax->getEntity != NULL)
7657
1.88M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
1.88M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
1.88M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
11.7k
      ent = xmlGetPredefinedEntity(name);
7661
1.88M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
1.88M
      (ctxt->userData==ctxt)) {
7663
33.3k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
33.3k
  }
7665
1.88M
    }
7666
1.88M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
1.88M
    if (ent == NULL) {
7690
754k
  if ((ctxt->standalone == 1) ||
7691
754k
      ((ctxt->hasExternalSubset == 0) &&
7692
749k
       (ctxt->hasPErefs == 0))) {
7693
690k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
690k
         "Entity '%s' not defined\n", name);
7695
690k
  } else {
7696
63.8k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
63.8k
         "Entity '%s' not defined\n", name);
7698
63.8k
      if ((ctxt->inSubset == 0) &&
7699
63.8k
    (ctxt->sax != NULL) &&
7700
63.8k
    (ctxt->sax->reference != NULL)) {
7701
63.8k
    ctxt->sax->reference(ctxt->userData, name);
7702
63.8k
      }
7703
63.8k
  }
7704
754k
  ctxt->valid = 0;
7705
754k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
1.12M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
912
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
912
     "Entity reference to unparsed entity %s\n", name);
7715
912
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
1.12M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
1.12M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
185
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
185
       "Attribute references external entity '%s'\n", name);
7726
185
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
1.12M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
1.12M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
413k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
12.8k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
228
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
12.8k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
12.8k
        }
7740
413k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
515
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
515
                    "'<' in entity '%s' is not allowed in attributes "
7743
515
                    "values\n", name);
7744
413k
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
713k
    else {
7750
713k
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
713k
      default:
7758
713k
      break;
7759
713k
  }
7760
713k
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
1.88M
    return(ent);
7769
1.88M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
11.4M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
11.4M
    xmlChar *name;
7805
11.4M
    const xmlChar *ptr;
7806
11.4M
    xmlChar cur;
7807
11.4M
    xmlEntityPtr ent = NULL;
7808
7809
11.4M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
11.4M
    ptr = *str;
7812
11.4M
    cur = *ptr;
7813
11.4M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
11.4M
    ptr++;
7817
11.4M
    name = xmlParseStringName(ctxt, &ptr);
7818
11.4M
    if (name == NULL) {
7819
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
0
           "xmlParseStringEntityRef: no name\n");
7821
0
  *str = ptr;
7822
0
  return(NULL);
7823
0
    }
7824
11.4M
    if (*ptr != ';') {
7825
2
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
2
        xmlFree(name);
7827
2
  *str = ptr;
7828
2
  return(NULL);
7829
2
    }
7830
11.4M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
11.4M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
11.4M
        ent = xmlGetPredefinedEntity(name);
7838
11.4M
        if (ent != NULL) {
7839
116
            xmlFree(name);
7840
116
            *str = ptr;
7841
116
            return(ent);
7842
116
        }
7843
11.4M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
11.4M
    if (ctxt->sax != NULL) {
7850
11.4M
  if (ctxt->sax->getEntity != NULL)
7851
11.4M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
11.4M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
20
      ent = xmlGetPredefinedEntity(name);
7854
11.4M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
126
      ent = xmlSAX2GetEntity(ctxt, name);
7856
126
  }
7857
11.4M
    }
7858
11.4M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
11.4M
    if (ent == NULL) {
7885
126
  if ((ctxt->standalone == 1) ||
7886
126
      ((ctxt->hasExternalSubset == 0) &&
7887
126
       (ctxt->hasPErefs == 0))) {
7888
81
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
81
         "Entity '%s' not defined\n", name);
7890
81
  } else {
7891
45
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
45
        "Entity '%s' not defined\n",
7893
45
        name);
7894
45
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
126
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
11.4M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
8
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
8
     "Entity reference to unparsed entity %s\n", name);
7906
8
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
11.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
11.4M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
0
   "Attribute references external entity '%s'\n", name);
7917
0
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
11.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
11.4M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
11.4M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
2.75k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
0
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
2.75k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
2.75k
        }
7931
11.4M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
44
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
44
                    "'<' in entity '%s' is not allowed in attributes "
7934
44
                    "values\n", name);
7935
11.4M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
1.15k
    else {
7941
1.15k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
1.15k
      default:
7949
1.15k
      break;
7950
1.15k
  }
7951
1.15k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
11.4M
    xmlFree(name);
7961
11.4M
    *str = ptr;
7962
11.4M
    return(ent);
7963
11.4M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
8.23M
{
8000
8.23M
    const xmlChar *name;
8001
8.23M
    xmlEntityPtr entity = NULL;
8002
8.23M
    xmlParserInputPtr input;
8003
8004
8.23M
    if (RAW != '%')
8005
0
        return;
8006
8.23M
    NEXT;
8007
8.23M
    name = xmlParseName(ctxt);
8008
8.23M
    if (name == NULL) {
8009
81.0k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
81.0k
  return;
8011
81.0k
    }
8012
8.15M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
8.15M
    if (RAW != ';') {
8016
13.9k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
13.9k
        return;
8018
13.9k
    }
8019
8020
8.13M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
8.13M
    if ((ctxt->sax != NULL) &&
8026
8.13M
  (ctxt->sax->getParameterEntity != NULL))
8027
8.13M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
8.13M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
8.13M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
449k
  if ((ctxt->standalone == 1) ||
8040
449k
      ((ctxt->hasExternalSubset == 0) &&
8041
449k
       (ctxt->hasPErefs == 0))) {
8042
4.32k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
4.32k
            "PEReference: %%%s; not found\n",
8044
4.32k
            name);
8045
445k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
445k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
915
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
915
                                 "PEReference: %%%s; not found\n",
8056
915
                                 name, NULL);
8057
915
            } else
8058
444k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
444k
                              "PEReference: %%%s; not found\n",
8060
444k
                              name, NULL);
8061
445k
            ctxt->valid = 0;
8062
445k
  }
8063
7.68M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
7.68M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
7.68M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
7.68M
  } else {
8073
7.68M
            xmlChar start[4];
8074
7.68M
            xmlCharEncoding enc;
8075
7.68M
            unsigned long parentConsumed;
8076
7.68M
            xmlEntityPtr oldEnt;
8077
8078
7.68M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
7.68M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
7.68M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
7.68M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
7.68M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
7.68M
    (ctxt->replaceEntities == 0) &&
8084
7.68M
    (ctxt->validate == 0))
8085
546
    return;
8086
8087
7.68M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
48
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
48
                xmlHaltParser(ctxt);
8090
48
                return;
8091
48
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
7.68M
            parentConsumed = ctxt->input->parentConsumed;
8095
7.68M
            oldEnt = ctxt->input->entity;
8096
7.68M
            if ((oldEnt == NULL) ||
8097
7.68M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
7.61M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
106k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
106k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
106k
                                     ctxt->input->cur - ctxt->input->base);
8102
106k
            }
8103
8104
7.68M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
7.68M
      if (xmlPushInput(ctxt, input) < 0) {
8106
21.7k
                xmlFreeInputStream(input);
8107
21.7k
    return;
8108
21.7k
            }
8109
8110
7.66M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
7.66M
            input->parentConsumed = parentConsumed;
8113
8114
7.66M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
1.44k
                GROW
8125
1.44k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
1.44k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
1.44k
                    start[0] = RAW;
8129
1.44k
                    start[1] = NXT(1);
8130
1.44k
                    start[2] = NXT(2);
8131
1.44k
                    start[3] = NXT(3);
8132
1.44k
                    enc = xmlDetectCharEncoding(start, 4);
8133
1.44k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
0
                        xmlSwitchEncoding(ctxt, enc);
8135
0
                    }
8136
1.44k
                }
8137
8138
1.44k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
1.44k
                    (IS_BLANK_CH(NXT(5)))) {
8140
0
                    xmlParseTextDecl(ctxt);
8141
0
                }
8142
1.44k
            }
8143
7.66M
  }
8144
7.68M
    }
8145
8.11M
    ctxt->hasPErefs = 1;
8146
8.11M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
45
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
45
    xmlParserInputPtr input;
8162
45
    xmlBufferPtr buf;
8163
45
    int l, c;
8164
45
    int count = 0;
8165
8166
45
    if ((ctxt == NULL) || (entity == NULL) ||
8167
45
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
45
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
45
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
45
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
45
    buf = xmlBufferCreate();
8180
45
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
45
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
45
    input = xmlNewEntityInputStream(ctxt, entity);
8188
45
    if (input == NULL) {
8189
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
0
              "xmlLoadEntityContent input error");
8191
0
  xmlBufferFree(buf);
8192
0
        return(-1);
8193
0
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
45
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
45
    GROW;
8206
45
    c = CUR_CHAR(l);
8207
1.93k
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
1.93k
           (IS_CHAR(c))) {
8209
1.89k
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
1.89k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
0
      count = 0;
8212
0
      GROW;
8213
0
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
0
  }
8218
1.89k
  NEXTL(l);
8219
1.89k
  c = CUR_CHAR(l);
8220
1.89k
  if (c == 0) {
8221
45
      count = 0;
8222
45
      GROW;
8223
45
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
45
      c = CUR_CHAR(l);
8228
45
  }
8229
1.89k
    }
8230
8231
45
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
45
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
45
        xmlPopInput(ctxt);
8234
45
    } else if (!IS_CHAR(c)) {
8235
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
0
                    c);
8238
0
  xmlBufferFree(buf);
8239
0
  return(-1);
8240
0
    }
8241
45
    entity->content = buf->content;
8242
45
    entity->length = buf->use;
8243
45
    buf->content = NULL;
8244
45
    xmlBufferFree(buf);
8245
8246
45
    return(0);
8247
45
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
48.0k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
48.0k
    const xmlChar *ptr;
8283
48.0k
    xmlChar cur;
8284
48.0k
    xmlChar *name;
8285
48.0k
    xmlEntityPtr entity = NULL;
8286
8287
48.0k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
48.0k
    ptr = *str;
8289
48.0k
    cur = *ptr;
8290
48.0k
    if (cur != '%')
8291
0
        return(NULL);
8292
48.0k
    ptr++;
8293
48.0k
    name = xmlParseStringName(ctxt, &ptr);
8294
48.0k
    if (name == NULL) {
8295
3
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
3
           "xmlParseStringPEReference: no name\n");
8297
3
  *str = ptr;
8298
3
  return(NULL);
8299
3
    }
8300
48.0k
    cur = *ptr;
8301
48.0k
    if (cur != ';') {
8302
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
0
  xmlFree(name);
8304
0
  *str = ptr;
8305
0
  return(NULL);
8306
0
    }
8307
48.0k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
48.0k
    if ((ctxt->sax != NULL) &&
8313
48.0k
  (ctxt->sax->getParameterEntity != NULL))
8314
48.0k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
48.0k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
48.0k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
16.3k
  if ((ctxt->standalone == 1) ||
8330
16.3k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
16.3k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
16.3k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
16.3k
        "PEReference: %%%s; not found\n",
8343
16.3k
        name, NULL);
8344
16.3k
      ctxt->valid = 0;
8345
16.3k
  }
8346
31.7k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
31.7k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
31.7k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
31.7k
    }
8357
48.0k
    ctxt->hasPErefs = 1;
8358
48.0k
    xmlFree(name);
8359
48.0k
    *str = ptr;
8360
48.0k
    return(entity);
8361
48.0k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
982k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
982k
    const xmlChar *name = NULL;
8382
982k
    xmlChar *ExternalID = NULL;
8383
982k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
982k
    SKIP(9);
8389
8390
982k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
982k
    name = xmlParseName(ctxt);
8396
982k
    if (name == NULL) {
8397
12.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
12.8k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
12.8k
    }
8400
982k
    ctxt->intSubName = name;
8401
8402
982k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
982k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
982k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
228k
        ctxt->hasExternalSubset = 1;
8411
228k
    }
8412
982k
    ctxt->extSubURI = URI;
8413
982k
    ctxt->extSubSystem = ExternalID;
8414
8415
982k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
982k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
982k
  (!ctxt->disableSAX))
8422
822k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
982k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
982k
    if (RAW == '[')
8431
494k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
488k
    if (RAW != '>') {
8437
286k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
286k
    }
8439
488k
    NEXT;
8440
488k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
580k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
580k
    if (RAW == '[') {
8457
580k
        int baseInputNr = ctxt->inputNr;
8458
580k
        ctxt->instate = XML_PARSER_DTD;
8459
580k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
580k
  SKIP_BLANKS;
8466
9.92M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
9.92M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
9.69M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
9.69M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
9.69M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
9.21M
          xmlParseMarkupDecl(ctxt);
8478
9.21M
            } else if (RAW == '%') {
8479
129k
          xmlParsePEReference(ctxt);
8480
348k
            } else {
8481
348k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
348k
                        "xmlParseInternalSubset: error detected in"
8483
348k
                        " Markup declaration\n");
8484
348k
                xmlHaltParser(ctxt);
8485
348k
                return;
8486
348k
            }
8487
9.34M
      SKIP_BLANKS;
8488
9.34M
  }
8489
232k
  if (RAW == ']') {
8490
150k
      NEXT;
8491
150k
      SKIP_BLANKS;
8492
150k
  }
8493
232k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
232k
    if (RAW != '>') {
8499
105k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
105k
  return;
8501
105k
    }
8502
126k
    NEXT;
8503
126k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
7.83M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
7.83M
    const xmlChar *name;
8544
7.83M
    xmlChar *val;
8545
8546
7.83M
    *value = NULL;
8547
7.83M
    GROW;
8548
7.83M
    name = xmlParseName(ctxt);
8549
7.83M
    if (name == NULL) {
8550
3.65M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
3.65M
                 "error parsing attribute name\n");
8552
3.65M
        return(NULL);
8553
3.65M
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
4.18M
    SKIP_BLANKS;
8559
4.18M
    if (RAW == '=') {
8560
2.74M
        NEXT;
8561
2.74M
  SKIP_BLANKS;
8562
2.74M
  val = xmlParseAttValue(ctxt);
8563
2.74M
  ctxt->instate = XML_PARSER_CONTENT;
8564
2.74M
    } else {
8565
1.44M
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
1.44M
         "Specification mandates value for attribute %s\n", name);
8567
1.44M
  return(name);
8568
1.44M
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
2.74M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
24.8k
  if (!xmlCheckLanguageID(val)) {
8577
15.6k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
15.6k
              "Malformed value for xml:lang : %s\n",
8579
15.6k
        val, NULL);
8580
15.6k
  }
8581
24.8k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
2.74M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
3.86k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
32
      *(ctxt->space) = 0;
8589
3.82k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
18
      *(ctxt->space) = 1;
8591
3.81k
  else {
8592
3.81k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
3.81k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
3.81k
                                 val, NULL);
8595
3.81k
  }
8596
3.86k
    }
8597
8598
2.74M
    *value = val;
8599
2.74M
    return(name);
8600
4.18M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
9.32M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
9.32M
    const xmlChar *name;
8634
9.32M
    const xmlChar *attname;
8635
9.32M
    xmlChar *attvalue;
8636
9.32M
    const xmlChar **atts = ctxt->atts;
8637
9.32M
    int nbatts = 0;
8638
9.32M
    int maxatts = ctxt->maxatts;
8639
9.32M
    int i;
8640
8641
9.32M
    if (RAW != '<') return(NULL);
8642
9.32M
    NEXT1;
8643
8644
9.32M
    name = xmlParseName(ctxt);
8645
9.32M
    if (name == NULL) {
8646
1.27M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
1.27M
       "xmlParseStartTag: invalid element name\n");
8648
1.27M
        return(NULL);
8649
1.27M
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
8.04M
    SKIP_BLANKS;
8657
8.04M
    GROW;
8658
8659
11.1M
    while (((RAW != '>') &&
8660
11.1M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
11.1M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
7.83M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
7.83M
        if (attname == NULL) {
8664
3.65M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
3.65M
         "xmlParseStartTag: problem parsing attributes\n");
8666
3.65M
      break;
8667
3.65M
  }
8668
4.18M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
3.44M
      for (i = 0; i < nbatts;i += 2) {
8675
907k
          if (xmlStrEqual(atts[i], attname)) {
8676
45.7k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
45.7k
        xmlFree(attvalue);
8678
45.7k
        goto failed;
8679
45.7k
    }
8680
907k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
2.54M
      if (atts == NULL) {
8685
654k
          maxatts = 22; /* allow for 10 attrs by default */
8686
654k
          atts = (const xmlChar **)
8687
654k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
654k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
654k
    ctxt->atts = atts;
8695
654k
    ctxt->maxatts = maxatts;
8696
1.88M
      } else if (nbatts + 4 > maxatts) {
8697
180
          const xmlChar **n;
8698
8699
180
          maxatts *= 2;
8700
180
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
180
               maxatts * sizeof(const xmlChar *));
8702
180
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
180
    atts = n;
8709
180
    ctxt->atts = atts;
8710
180
    ctxt->maxatts = maxatts;
8711
180
      }
8712
2.54M
      atts[nbatts++] = attname;
8713
2.54M
      atts[nbatts++] = attvalue;
8714
2.54M
      atts[nbatts] = NULL;
8715
2.54M
      atts[nbatts + 1] = NULL;
8716
2.54M
  } else {
8717
1.59M
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
1.59M
  }
8720
8721
4.18M
failed:
8722
8723
4.18M
  GROW
8724
4.18M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
1.05M
      break;
8726
3.12M
  if (SKIP_BLANKS == 0) {
8727
2.36M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
2.36M
         "attributes construct error\n");
8729
2.36M
  }
8730
3.12M
  SHRINK;
8731
3.12M
        GROW;
8732
3.12M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
8.04M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
8.04M
  (!ctxt->disableSAX)) {
8739
7.37M
  if (nbatts > 0)
8740
1.80M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
5.56M
  else
8742
5.56M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
7.37M
    }
8744
8745
8.04M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
7.00M
        for (i = 1;i < nbatts;i+=2)
8748
2.54M
      if (atts[i] != NULL)
8749
2.54M
         xmlFree((xmlChar *) atts[i]);
8750
4.45M
    }
8751
8.04M
    return(name);
8752
8.04M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.04M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.04M
    const xmlChar *name;
8772
8773
1.04M
    GROW;
8774
1.04M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.04M
    SKIP(2);
8780
8781
1.04M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.04M
    GROW;
8787
1.04M
    SKIP_BLANKS;
8788
1.04M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
353k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
353k
    } else
8791
689k
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.04M
    if (name != (xmlChar*)1) {
8800
550k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
550k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
550k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
550k
                    ctxt->name, line, name);
8804
550k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.04M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.04M
  (!ctxt->disableSAX))
8811
936k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.04M
    namePop(ctxt);
8814
1.04M
    spacePop(ctxt);
8815
1.04M
    return;
8816
1.04M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
14.7M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
14.7M
    int i;
8858
8859
14.7M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
18.2M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
5.89M
        if (ctxt->nsTab[i] == prefix) {
8862
2.04M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
58.9k
          return(NULL);
8864
1.98M
      return(ctxt->nsTab[i + 1]);
8865
2.04M
  }
8866
12.3M
    return(NULL);
8867
14.3M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
27.0M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
27.0M
    const xmlChar *l, *p;
8886
8887
27.0M
    GROW;
8888
8889
27.0M
    l = xmlParseNCName(ctxt);
8890
27.0M
    if (l == NULL) {
8891
6.18M
        if (CUR == ':') {
8892
266k
      l = xmlParseName(ctxt);
8893
266k
      if (l != NULL) {
8894
266k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
266k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
266k
    *prefix = NULL;
8897
266k
    return(l);
8898
266k
      }
8899
266k
  }
8900
5.91M
        return(NULL);
8901
6.18M
    }
8902
20.8M
    if (CUR == ':') {
8903
7.08M
        NEXT;
8904
7.08M
  p = l;
8905
7.08M
  l = xmlParseNCName(ctxt);
8906
7.08M
  if (l == NULL) {
8907
754k
      xmlChar *tmp;
8908
8909
754k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
754k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
754k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
754k
      l = xmlParseNmtoken(ctxt);
8914
754k
      if (l == NULL) {
8915
471k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
471k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
471k
            } else {
8919
283k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
283k
    xmlFree((char *)l);
8921
283k
      }
8922
754k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
754k
      if (tmp != NULL) xmlFree(tmp);
8924
754k
      *prefix = NULL;
8925
754k
      return(p);
8926
754k
  }
8927
6.33M
  if (CUR == ':') {
8928
354k
      xmlChar *tmp;
8929
8930
354k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
354k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
354k
      NEXT;
8933
354k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
354k
      if (tmp != NULL) {
8935
303k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
303k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
303k
    if (tmp != NULL) xmlFree(tmp);
8938
303k
    *prefix = p;
8939
303k
    return(l);
8940
303k
      }
8941
51.2k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
51.2k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
51.2k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
51.2k
      if (tmp != NULL) xmlFree(tmp);
8946
51.2k
      *prefix = p;
8947
51.2k
      return(l);
8948
51.2k
  }
8949
5.97M
  *prefix = p;
8950
5.97M
    } else
8951
13.7M
        *prefix = NULL;
8952
19.7M
    return(l);
8953
20.8M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
789k
                        xmlChar const *prefix) {
8971
789k
    const xmlChar *cmp;
8972
789k
    const xmlChar *in;
8973
789k
    const xmlChar *ret;
8974
789k
    const xmlChar *prefix2;
8975
8976
789k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
789k
    GROW;
8979
789k
    in = ctxt->input->cur;
8980
8981
789k
    cmp = prefix;
8982
1.81M
    while (*in != 0 && *in == *cmp) {
8983
1.02M
  ++in;
8984
1.02M
  ++cmp;
8985
1.02M
    }
8986
789k
    if ((*cmp == 0) && (*in == ':')) {
8987
512k
        in++;
8988
512k
  cmp = name;
8989
2.14M
  while (*in != 0 && *in == *cmp) {
8990
1.63M
      ++in;
8991
1.63M
      ++cmp;
8992
1.63M
  }
8993
512k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
298k
            ctxt->input->col += in - ctxt->input->cur;
8996
298k
      ctxt->input->cur = in;
8997
298k
      return((const xmlChar*) 1);
8998
298k
  }
8999
512k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
491k
    ret = xmlParseQName (ctxt, &prefix2);
9004
491k
    if ((ret == name) && (prefix == prefix2))
9005
19.4k
  return((const xmlChar*) 1);
9006
471k
    return ret;
9007
491k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
19.6k
    const xmlChar *oldbase = ctxt->input->base;\
9045
19.6k
    GROW;\
9046
19.6k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
19.6k
        return(NULL);\
9048
19.6k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
19.6k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
8.16M
{
9059
8.16M
    xmlChar limit = 0;
9060
8.16M
    const xmlChar *in = NULL, *start, *end, *last;
9061
8.16M
    xmlChar *ret = NULL;
9062
8.16M
    int line, col;
9063
8.16M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
2.29M
                    XML_MAX_HUGE_LENGTH :
9065
8.16M
                    XML_MAX_TEXT_LENGTH;
9066
9067
8.16M
    GROW;
9068
8.16M
    in = (xmlChar *) CUR_PTR;
9069
8.16M
    line = ctxt->input->line;
9070
8.16M
    col = ctxt->input->col;
9071
8.16M
    if (*in != '"' && *in != '\'') {
9072
376k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
376k
        return (NULL);
9074
376k
    }
9075
7.78M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
7.78M
    limit = *in++;
9083
7.78M
    col++;
9084
7.78M
    end = ctxt->input->end;
9085
7.78M
    start = in;
9086
7.78M
    if (in >= end) {
9087
4.19k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
4.19k
    }
9089
7.78M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
125k
  while ((in < end) && (*in != limit) &&
9094
125k
         ((*in == 0x20) || (*in == 0x9) ||
9095
125k
          (*in == 0xA) || (*in == 0xD))) {
9096
17.5k
      if (*in == 0xA) {
9097
12.3k
          line++; col = 1;
9098
12.3k
      } else {
9099
5.13k
          col++;
9100
5.13k
      }
9101
17.5k
      in++;
9102
17.5k
      start = in;
9103
17.5k
      if (in >= end) {
9104
5
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
5
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
5
      }
9111
17.5k
  }
9112
928k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
928k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
821k
      col++;
9115
821k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
820k
      if (in >= end) {
9117
59
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
59
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
59
      }
9124
820k
  }
9125
107k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
109k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
112k
  while ((in < end) && (*in != limit) &&
9131
112k
         ((*in == 0x20) || (*in == 0x9) ||
9132
24.2k
          (*in == 0xA) || (*in == 0xD))) {
9133
4.97k
      if (*in == 0xA) {
9134
2.95k
          line++, col = 1;
9135
2.95k
      } else {
9136
2.01k
          col++;
9137
2.01k
      }
9138
4.97k
      in++;
9139
4.97k
      if (in >= end) {
9140
15
    const xmlChar *oldbase = ctxt->input->base;
9141
15
    GROW;
9142
15
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
15
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
15
    end = ctxt->input->end;
9151
15
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
15
      }
9157
4.97k
  }
9158
107k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
107k
  if (*in != limit) goto need_complex;
9164
7.67M
    } else {
9165
78.5M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
78.5M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
70.8M
      in++;
9168
70.8M
      col++;
9169
70.8M
      if (in >= end) {
9170
15.4k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
15.4k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
15.4k
      }
9177
70.8M
  }
9178
7.67M
  last = in;
9179
7.67M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
7.67M
  if (*in != limit) goto need_complex;
9185
7.67M
    }
9186
5.46M
    in++;
9187
5.46M
    col++;
9188
5.46M
    if (len != NULL) {
9189
3.74M
        if (alloc) *alloc = 0;
9190
3.74M
        *len = last - start;
9191
3.74M
        ret = (xmlChar *) start;
9192
3.74M
    } else {
9193
1.72M
        if (alloc) *alloc = 1;
9194
1.72M
        ret = xmlStrndup(start, last - start);
9195
1.72M
    }
9196
5.46M
    CUR_PTR = in;
9197
5.46M
    ctxt->input->line = line;
9198
5.46M
    ctxt->input->col = col;
9199
5.46M
    return ret;
9200
2.31M
need_complex:
9201
2.31M
    if (alloc) *alloc = 1;
9202
2.31M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
7.78M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
10.5M
{
9226
10.5M
    const xmlChar *name;
9227
10.5M
    xmlChar *val, *internal_val = NULL;
9228
10.5M
    int normalize = 0;
9229
9230
10.5M
    *value = NULL;
9231
10.5M
    GROW;
9232
10.5M
    name = xmlParseQName(ctxt, prefix);
9233
10.5M
    if (name == NULL) {
9234
3.78M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
3.78M
                       "error parsing attribute name\n");
9236
3.78M
        return (NULL);
9237
3.78M
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
6.74M
    if (ctxt->attsSpecial != NULL) {
9243
261k
        int type;
9244
9245
261k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
261k
                                                 pref, elem, *prefix, name);
9247
261k
        if (type != 0)
9248
109k
            normalize = 1;
9249
261k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
6.74M
    SKIP_BLANKS;
9255
6.74M
    if (RAW == '=') {
9256
5.32M
        NEXT;
9257
5.32M
        SKIP_BLANKS;
9258
5.32M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
5.32M
        if (val == NULL)
9260
197k
            return (NULL);
9261
5.12M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
107k
      if (*alloc) {
9269
19.3k
          const xmlChar *val2;
9270
9271
19.3k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
19.3k
    if ((val2 != NULL) && (val2 != val)) {
9273
2.62k
        xmlFree(val);
9274
2.62k
        val = (xmlChar *) val2;
9275
2.62k
    }
9276
19.3k
      }
9277
107k
  }
9278
5.12M
        ctxt->instate = XML_PARSER_CONTENT;
9279
5.12M
    } else {
9280
1.42M
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
1.42M
                          "Specification mandates value for attribute %s\n",
9282
1.42M
                          name);
9283
1.42M
        return (name);
9284
1.42M
    }
9285
9286
5.12M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
322k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
30.4k
            internal_val = xmlStrndup(val, *len);
9294
30.4k
            if (!xmlCheckLanguageID(internal_val)) {
9295
18.3k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
18.3k
                              "Malformed value for xml:lang : %s\n",
9297
18.3k
                              internal_val, NULL);
9298
18.3k
            }
9299
30.4k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
322k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
6.95k
            internal_val = xmlStrndup(val, *len);
9306
6.95k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
31
                *(ctxt->space) = 0;
9308
6.92k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
59
                *(ctxt->space) = 1;
9310
6.86k
            else {
9311
6.86k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
6.86k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
6.86k
                              internal_val, NULL);
9314
6.86k
            }
9315
6.95k
        }
9316
322k
        if (internal_val) {
9317
37.4k
            xmlFree(internal_val);
9318
37.4k
        }
9319
322k
    }
9320
9321
5.12M
    *value = val;
9322
5.12M
    return (name);
9323
6.74M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
16.0M
                  const xmlChar **URI, int *tlen) {
9356
16.0M
    const xmlChar *localname;
9357
16.0M
    const xmlChar *prefix;
9358
16.0M
    const xmlChar *attname;
9359
16.0M
    const xmlChar *aprefix;
9360
16.0M
    const xmlChar *nsname;
9361
16.0M
    xmlChar *attvalue;
9362
16.0M
    const xmlChar **atts = ctxt->atts;
9363
16.0M
    int maxatts = ctxt->maxatts;
9364
16.0M
    int nratts, nbatts, nbdef, inputid;
9365
16.0M
    int i, j, nbNs, attval;
9366
16.0M
    unsigned long cur;
9367
16.0M
    int nsNr = ctxt->nsNr;
9368
9369
16.0M
    if (RAW != '<') return(NULL);
9370
16.0M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
16.0M
    SHRINK;
9380
16.0M
    cur = ctxt->input->cur - ctxt->input->base;
9381
16.0M
    inputid = ctxt->input->id;
9382
16.0M
    nbatts = 0;
9383
16.0M
    nratts = 0;
9384
16.0M
    nbdef = 0;
9385
16.0M
    nbNs = 0;
9386
16.0M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
16.0M
    ctxt->nsNr = nsNr;
9389
9390
16.0M
    localname = xmlParseQName(ctxt, &prefix);
9391
16.0M
    if (localname == NULL) {
9392
2.09M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
2.09M
           "StartTag: invalid element name\n");
9394
2.09M
        return(NULL);
9395
2.09M
    }
9396
13.9M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
13.9M
    SKIP_BLANKS;
9404
13.9M
    GROW;
9405
9406
15.6M
    while (((RAW != '>') &&
9407
15.6M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
15.6M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
10.5M
  int len = -1, alloc = 0;
9410
9411
10.5M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
10.5M
                               &aprefix, &attvalue, &len, &alloc);
9413
10.5M
        if (attname == NULL) {
9414
3.98M
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
3.98M
           "xmlParseStartTag: problem parsing attributes\n");
9416
3.98M
      break;
9417
3.98M
  }
9418
6.54M
        if (attvalue == NULL)
9419
1.42M
            goto next_attr;
9420
5.12M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
5.12M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
413k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
413k
            xmlURIPtr uri;
9425
9426
413k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
413k
            if (*URL != 0) {
9434
389k
                uri = xmlParseURI((const char *) URL);
9435
389k
                if (uri == NULL) {
9436
160k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
160k
                             "xmlns: '%s' is not a valid URI\n",
9438
160k
                                       URL, NULL, NULL);
9439
229k
                } else {
9440
229k
                    if (uri->scheme == NULL) {
9441
123k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
123k
                                  "xmlns: URI %s is not absolute\n",
9443
123k
                                  URL, NULL, NULL);
9444
123k
                    }
9445
229k
                    xmlFreeURI(uri);
9446
229k
                }
9447
389k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
389k
                if ((len == 29) &&
9456
389k
                    (xmlStrEqual(URL,
9457
8.46k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
111
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
111
                         "reuse of the xmlns namespace name is forbidden\n",
9460
111
                             NULL, NULL, NULL);
9461
111
                    goto next_attr;
9462
111
                }
9463
389k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
436k
            for (j = 1;j <= nbNs;j++)
9468
32.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
10.0k
                    break;
9470
413k
            if (j <= nbNs)
9471
10.0k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
403k
            else
9473
403k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
4.70M
        } else if (aprefix == ctxt->str_xmlns) {
9476
870k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
870k
            xmlURIPtr uri;
9478
9479
870k
            if (attname == ctxt->str_xml) {
9480
2.26k
                if (URL != ctxt->str_xml_ns) {
9481
2.26k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
2.26k
                             "xml namespace prefix mapped to wrong URI\n",
9483
2.26k
                             NULL, NULL, NULL);
9484
2.26k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
2.26k
                goto next_attr;
9489
2.26k
            }
9490
867k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
867k
            if (attname == ctxt->str_xmlns) {
9499
485
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
485
                         "redefinition of the xmlns prefix is forbidden\n",
9501
485
                         NULL, NULL, NULL);
9502
485
                goto next_attr;
9503
485
            }
9504
867k
            if ((len == 29) &&
9505
867k
                (xmlStrEqual(URL,
9506
16.7k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
281
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
281
                         "reuse of the xmlns namespace name is forbidden\n",
9509
281
                         NULL, NULL, NULL);
9510
281
                goto next_attr;
9511
281
            }
9512
867k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
27.7k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
27.7k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
27.7k
                              attname, NULL, NULL);
9516
27.7k
                goto next_attr;
9517
839k
            } else {
9518
839k
                uri = xmlParseURI((const char *) URL);
9519
839k
                if (uri == NULL) {
9520
274k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
274k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
274k
                                       attname, URL, NULL);
9523
565k
                } else {
9524
565k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
117k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
117k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
117k
                                  attname, URL, NULL);
9528
117k
                    }
9529
565k
                    xmlFreeURI(uri);
9530
565k
                }
9531
839k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
933k
            for (j = 1;j <= nbNs;j++)
9537
106k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
11.5k
                    break;
9539
839k
            if (j <= nbNs)
9540
11.5k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
827k
            else
9542
827k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
3.83M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
3.83M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
692k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
692k
                maxatts = ctxt->maxatts;
9553
692k
                atts = ctxt->atts;
9554
692k
            }
9555
3.83M
            ctxt->attallocs[nratts++] = alloc;
9556
3.83M
            atts[nbatts++] = attname;
9557
3.83M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
3.83M
            if (alloc)
9565
917k
                atts[nbatts++] = NULL;
9566
2.92M
            else
9567
2.92M
                atts[nbatts++] = ctxt->input->base;
9568
3.83M
            atts[nbatts++] = attvalue;
9569
3.83M
            attvalue += len;
9570
3.83M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
3.83M
            if (alloc != 0) attval = 1;
9575
3.83M
            attvalue = NULL; /* moved into atts */
9576
3.83M
        }
9577
9578
6.54M
next_attr:
9579
6.54M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
465k
            xmlFree(attvalue);
9581
465k
            attvalue = NULL;
9582
465k
        }
9583
9584
6.54M
  GROW
9585
6.54M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
6.54M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
2.27M
      break;
9589
4.26M
  if (SKIP_BLANKS == 0) {
9590
2.53M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
2.53M
         "attributes construct error\n");
9592
2.53M
      break;
9593
2.53M
  }
9594
1.73M
        GROW;
9595
1.73M
    }
9596
9597
13.9M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
17.7M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
3.83M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
2.92M
            const xmlChar *old = atts[i+2];
9612
2.92M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
2.92M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
2.92M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
2.92M
        }
9616
3.83M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
13.9M
    if (ctxt->attsDefault != NULL) {
9622
267k
        xmlDefAttrsPtr defaults;
9623
9624
267k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
267k
  if (defaults != NULL) {
9626
63.5k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
41.7k
          attname = defaults->values[5 * i];
9628
41.7k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
41.7k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
254
        for (j = 1;j <= nbNs;j++)
9638
187
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
102
          break;
9640
169
              if (j <= nbNs) continue;
9641
9642
67
        nsname = xmlGetNamespace(ctxt, NULL);
9643
67
        if (nsname != defaults->values[5 * i + 2]) {
9644
66
      if (nsPush(ctxt, NULL,
9645
66
                 defaults->values[5 * i + 2]) > 0)
9646
66
          nbNs++;
9647
66
        }
9648
41.6k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
621
        for (j = 1;j <= nbNs;j++)
9653
232
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
162
          break;
9655
551
              if (j <= nbNs) continue;
9656
9657
389
        nsname = xmlGetNamespace(ctxt, attname);
9658
389
        if (nsname != defaults->values[5 * i + 2]) {
9659
327
      if (nsPush(ctxt, attname,
9660
327
                 defaults->values[5 * i + 2]) > 0)
9661
327
          nbNs++;
9662
327
        }
9663
41.0k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
102k
        for (j = 0;j < nbatts;j+=5) {
9668
62.1k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
1.06k
          break;
9670
62.1k
        }
9671
41.0k
        if (j < nbatts) continue;
9672
9673
39.9k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
2.35k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
2.35k
      maxatts = ctxt->maxatts;
9679
2.35k
      atts = ctxt->atts;
9680
2.35k
        }
9681
39.9k
        atts[nbatts++] = attname;
9682
39.9k
        atts[nbatts++] = aprefix;
9683
39.9k
        if (aprefix == NULL)
9684
33.7k
      atts[nbatts++] = NULL;
9685
6.29k
        else
9686
6.29k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
39.9k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
39.9k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
39.9k
        if ((ctxt->standalone == 1) &&
9690
39.9k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
39.9k
        nbdef++;
9696
39.9k
    }
9697
41.7k
      }
9698
21.7k
  }
9699
267k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
17.8M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
3.87M
  if (atts[i + 1] != NULL) {
9709
812k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
812k
      if (nsname == NULL) {
9711
468k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
468k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
468k
        atts[i + 1], atts[i], localname);
9714
468k
      }
9715
812k
      atts[i + 2] = nsname;
9716
812k
  } else
9717
3.06M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
5.40M
        for (j = 0; j < i;j += 5) {
9725
1.56M
      if (atts[i] == atts[j]) {
9726
139k
          if (atts[i+1] == atts[j+1]) {
9727
40.0k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
40.0k
        break;
9729
40.0k
    }
9730
99.1k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
66
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
66
           "Namespaced Attribute %s in '%s' redefined\n",
9733
66
           atts[i], nsname, NULL);
9734
66
        break;
9735
66
    }
9736
99.1k
      }
9737
1.56M
  }
9738
3.87M
    }
9739
9740
13.9M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
13.9M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
2.95M
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
2.95M
           "Namespace prefix %s on %s is not defined\n",
9744
2.95M
     prefix, localname, NULL);
9745
2.95M
    }
9746
13.9M
    *pref = prefix;
9747
13.9M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
13.9M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
13.9M
  (!ctxt->disableSAX)) {
9754
12.5M
  if (nbNs > 0)
9755
914k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
914k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
914k
        nbatts / 5, nbdef, atts);
9758
11.6M
  else
9759
11.6M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
11.6M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
12.5M
    }
9762
9763
13.9M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
13.9M
    if (attval != 0) {
9768
2.00M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
1.14M
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
917k
          xmlFree((xmlChar *) atts[i]);
9771
857k
    }
9772
9773
13.9M
    return(localname);
9774
13.9M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
2.15M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
2.15M
    const xmlChar *name;
9794
9795
2.15M
    GROW;
9796
2.15M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
2.15M
    SKIP(2);
9801
9802
2.15M
    if (tag->prefix == NULL)
9803
1.36M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
789k
    else
9805
789k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
2.15M
    GROW;
9811
2.15M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
2.15M
    SKIP_BLANKS;
9814
2.15M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
582k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
582k
    } else
9817
1.56M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
2.15M
    if (name != (xmlChar*)1) {
9826
962k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
962k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
962k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
962k
                    ctxt->name, tag->line, name);
9830
962k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
2.15M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
2.15M
  (!ctxt->disableSAX))
9837
1.88M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
1.88M
                                tag->URI);
9839
9840
2.15M
    spacePop(ctxt);
9841
2.15M
    if (tag->nsNr != 0)
9842
174k
  nsPop(ctxt, tag->nsNr);
9843
2.15M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
238k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
238k
    xmlChar *buf = NULL;
9864
238k
    int len = 0;
9865
238k
    int size = XML_PARSER_BUFFER_SIZE;
9866
238k
    int r, rl;
9867
238k
    int s, sl;
9868
238k
    int cur, l;
9869
238k
    int count = 0;
9870
238k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
75.5k
                    XML_MAX_HUGE_LENGTH :
9872
238k
                    XML_MAX_TEXT_LENGTH;
9873
9874
238k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
238k
    SKIP(3);
9877
9878
238k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
238k
    SKIP(6);
9881
9882
238k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
238k
    r = CUR_CHAR(rl);
9884
238k
    if (!IS_CHAR(r)) {
9885
3.59k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
3.59k
        goto out;
9887
3.59k
    }
9888
235k
    NEXTL(rl);
9889
235k
    s = CUR_CHAR(sl);
9890
235k
    if (!IS_CHAR(s)) {
9891
4.41k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
4.41k
        goto out;
9893
4.41k
    }
9894
230k
    NEXTL(sl);
9895
230k
    cur = CUR_CHAR(l);
9896
230k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
230k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
29.2M
    while (IS_CHAR(cur) &&
9902
29.2M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
28.9M
  if (len + 5 >= size) {
9904
150k
      xmlChar *tmp;
9905
9906
150k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
150k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
150k
      buf = tmp;
9912
150k
      size *= 2;
9913
150k
  }
9914
28.9M
  COPY_BUF(rl,buf,len,r);
9915
28.9M
  r = s;
9916
28.9M
  rl = sl;
9917
28.9M
  s = cur;
9918
28.9M
  sl = l;
9919
28.9M
  count++;
9920
28.9M
  if (count > 50) {
9921
500k
      SHRINK;
9922
500k
      GROW;
9923
500k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
500k
      count = 0;
9927
500k
  }
9928
28.9M
  NEXTL(l);
9929
28.9M
  cur = CUR_CHAR(l);
9930
28.9M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
28.9M
    }
9936
230k
    buf[len] = 0;
9937
230k
    if (cur != '>') {
9938
83.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
83.7k
                       "CData section not finished\n%.50s\n", buf);
9940
83.7k
        goto out;
9941
83.7k
    }
9942
146k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
146k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
112k
  if (ctxt->sax->cdataBlock != NULL)
9949
73.5k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
39.2k
  else if (ctxt->sax->characters != NULL)
9951
39.2k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
112k
    }
9953
9954
238k
out:
9955
238k
    if (ctxt->instate != XML_PARSER_EOF)
9956
238k
        ctxt->instate = XML_PARSER_CONTENT;
9957
238k
    xmlFree(buf);
9958
238k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
888k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
888k
    int nameNr = ctxt->nameNr;
9971
9972
888k
    GROW;
9973
28.9M
    while ((RAW != 0) &&
9974
28.9M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
28.1M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
28.1M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
307k
      xmlParsePI(ctxt);
9982
307k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
27.8M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
238k
      xmlParseCDSect(ctxt);
9990
238k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
27.5M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
27.5M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
412k
      xmlParseComment(ctxt);
9998
412k
      ctxt->instate = XML_PARSER_CONTENT;
9999
412k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
27.1M
  else if (*cur == '<') {
10005
9.67M
            if (NXT(1) == '/') {
10006
1.05M
                if (ctxt->nameNr <= nameNr)
10007
61.4k
                    break;
10008
995k
          xmlParseElementEnd(ctxt);
10009
8.61M
            } else {
10010
8.61M
          xmlParseElementStart(ctxt);
10011
8.61M
            }
10012
9.67M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
17.5M
  else if (*cur == '&') {
10020
2.70M
      xmlParseReference(ctxt);
10021
2.70M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
14.7M
  else {
10027
14.7M
      xmlParseCharData(ctxt, 0);
10028
14.7M
  }
10029
10030
28.0M
  GROW;
10031
28.0M
  SHRINK;
10032
28.0M
    }
10033
888k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
16.9k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
16.9k
    int nameNr = ctxt->nameNr;
10047
10048
16.9k
    xmlParseContentInternal(ctxt);
10049
10050
16.9k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
524
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
524
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
524
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
524
                "Premature end of data in tag %s line %d\n",
10055
524
    name, line, NULL);
10056
524
    }
10057
16.9k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
1.63M
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
1.63M
    if (xmlParseElementStart(ctxt) != 0)
10078
767k
        return;
10079
10080
871k
    xmlParseContentInternal(ctxt);
10081
871k
    if (ctxt->instate == XML_PARSER_EOF)
10082
73
  return;
10083
10084
871k
    if (CUR == 0) {
10085
810k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
810k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
810k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
810k
                "Premature end of data in tag %s line %d\n",
10089
810k
    name, line, NULL);
10090
810k
        return;
10091
810k
    }
10092
10093
61.1k
    xmlParseElementEnd(ctxt);
10094
61.1k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
10.2M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
10.2M
    const xmlChar *name;
10108
10.2M
    const xmlChar *prefix = NULL;
10109
10.2M
    const xmlChar *URI = NULL;
10110
10.2M
    xmlParserNodeInfo node_info;
10111
10.2M
    int line, tlen = 0;
10112
10.2M
    xmlNodePtr ret;
10113
10.2M
    int nsNr = ctxt->nsNr;
10114
10115
10.2M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
10.2M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
10.2M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
10.2M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
10.2M
    else if (*ctxt->space == -2)
10134
2.68M
  spacePush(ctxt, -1);
10135
7.57M
    else
10136
7.57M
  spacePush(ctxt, *ctxt->space);
10137
10138
10.2M
    line = ctxt->input->line;
10139
10.2M
#ifdef LIBXML_SAX1_ENABLED
10140
10.2M
    if (ctxt->sax2)
10141
6.52M
#endif /* LIBXML_SAX1_ENABLED */
10142
6.52M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
3.73M
#ifdef LIBXML_SAX1_ENABLED
10144
3.73M
    else
10145
3.73M
  name = xmlParseStartTag(ctxt);
10146
10.2M
#endif /* LIBXML_SAX1_ENABLED */
10147
10.2M
    if (ctxt->instate == XML_PARSER_EOF)
10148
76
  return(-1);
10149
10.2M
    if (name == NULL) {
10150
2.92M
  spacePop(ctxt);
10151
2.92M
        return(-1);
10152
2.92M
    }
10153
7.33M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
7.33M
    ret = ctxt->node;
10155
10156
7.33M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
7.33M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
7.33M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
7.33M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
7.33M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
506k
        SKIP(2);
10172
506k
  if (ctxt->sax2) {
10173
378k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
378k
    (!ctxt->disableSAX))
10175
308k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
378k
#ifdef LIBXML_SAX1_ENABLED
10177
378k
  } else {
10178
128k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
128k
    (!ctxt->disableSAX))
10180
93.3k
    ctxt->sax->endElement(ctxt->userData, name);
10181
128k
#endif /* LIBXML_SAX1_ENABLED */
10182
128k
  }
10183
506k
  namePop(ctxt);
10184
506k
  spacePop(ctxt);
10185
506k
  if (nsNr != ctxt->nsNr)
10186
31.8k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
506k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
506k
  return(1);
10195
506k
    }
10196
6.82M
    if (RAW == '>') {
10197
3.25M
        NEXT1;
10198
3.56M
    } else {
10199
3.56M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
3.56M
         "Couldn't find end of Start Tag %s line %d\n",
10201
3.56M
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
3.56M
  nodePop(ctxt);
10207
3.56M
  namePop(ctxt);
10208
3.56M
  spacePop(ctxt);
10209
3.56M
  if (nsNr != ctxt->nsNr)
10210
180k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
3.56M
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
3.56M
  return(-1);
10223
3.56M
    }
10224
10225
3.25M
    return(0);
10226
6.82M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.05M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.05M
    xmlParserNodeInfo node_info;
10237
1.05M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.05M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.05M
    if (ctxt->sax2) {
10249
728k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
728k
  namePop(ctxt);
10251
728k
    }
10252
327k
#ifdef LIBXML_SAX1_ENABLED
10253
327k
    else
10254
327k
  xmlParseEndTag1(ctxt, 0);
10255
1.05M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.05M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.05M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
1.14M
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
1.14M
    xmlChar *buf = NULL;
10286
1.14M
    int len = 0;
10287
1.14M
    int size = 10;
10288
1.14M
    xmlChar cur;
10289
10290
1.14M
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
1.14M
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
1.14M
    cur = CUR;
10296
1.14M
    if (!((cur >= '0') && (cur <= '9'))) {
10297
32.3k
  xmlFree(buf);
10298
32.3k
  return(NULL);
10299
32.3k
    }
10300
1.10M
    buf[len++] = cur;
10301
1.10M
    NEXT;
10302
1.10M
    cur=CUR;
10303
1.10M
    if (cur != '.') {
10304
28.2k
  xmlFree(buf);
10305
28.2k
  return(NULL);
10306
28.2k
    }
10307
1.08M
    buf[len++] = cur;
10308
1.08M
    NEXT;
10309
1.08M
    cur=CUR;
10310
2.35M
    while ((cur >= '0') && (cur <= '9')) {
10311
1.27M
  if (len + 1 >= size) {
10312
10.7k
      xmlChar *tmp;
10313
10314
10.7k
      size *= 2;
10315
10.7k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
10.7k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
10.7k
      buf = tmp;
10322
10.7k
  }
10323
1.27M
  buf[len++] = cur;
10324
1.27M
  NEXT;
10325
1.27M
  cur=CUR;
10326
1.27M
    }
10327
1.08M
    buf[len] = 0;
10328
1.08M
    return(buf);
10329
1.08M
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
1.70M
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
1.70M
    xmlChar *version = NULL;
10349
10350
1.70M
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
1.24M
  SKIP(7);
10352
1.24M
  SKIP_BLANKS;
10353
1.24M
  if (RAW != '=') {
10354
61.4k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
61.4k
      return(NULL);
10356
61.4k
        }
10357
1.18M
  NEXT;
10358
1.18M
  SKIP_BLANKS;
10359
1.18M
  if (RAW == '"') {
10360
597k
      NEXT;
10361
597k
      version = xmlParseVersionNum(ctxt);
10362
597k
      if (RAW != '"') {
10363
62.8k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
62.8k
      } else
10365
534k
          NEXT;
10366
597k
  } else if (RAW == '\''){
10367
545k
      NEXT;
10368
545k
      version = xmlParseVersionNum(ctxt);
10369
545k
      if (RAW != '\'') {
10370
70.3k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
70.3k
      } else
10372
474k
          NEXT;
10373
545k
  } else {
10374
38.7k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
38.7k
  }
10376
1.18M
    }
10377
1.64M
    return(version);
10378
1.70M
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
583k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
583k
    xmlChar *buf = NULL;
10395
583k
    int len = 0;
10396
583k
    int size = 10;
10397
583k
    xmlChar cur;
10398
10399
583k
    cur = CUR;
10400
583k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
583k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
569k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
569k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
569k
  buf[len++] = cur;
10409
569k
  NEXT;
10410
569k
  cur = CUR;
10411
5.78M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
5.78M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
5.78M
         ((cur >= '0') && (cur <= '9')) ||
10414
5.78M
         (cur == '.') || (cur == '_') ||
10415
5.78M
         (cur == '-')) {
10416
5.21M
      if (len + 1 >= size) {
10417
459k
          xmlChar *tmp;
10418
10419
459k
    size *= 2;
10420
459k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
459k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
459k
    buf = tmp;
10427
459k
      }
10428
5.21M
      buf[len++] = cur;
10429
5.21M
      NEXT;
10430
5.21M
      cur = CUR;
10431
5.21M
      if (cur == 0) {
10432
5.07k
          SHRINK;
10433
5.07k
    GROW;
10434
5.07k
    cur = CUR;
10435
5.07k
      }
10436
5.21M
        }
10437
569k
  buf[len] = 0;
10438
569k
    } else {
10439
14.5k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
14.5k
    }
10441
583k
    return(buf);
10442
583k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
1.52M
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
1.52M
    xmlChar *encoding = NULL;
10462
10463
1.52M
    SKIP_BLANKS;
10464
1.52M
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
607k
  SKIP(8);
10466
607k
  SKIP_BLANKS;
10467
607k
  if (RAW != '=') {
10468
12.1k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
12.1k
      return(NULL);
10470
12.1k
        }
10471
595k
  NEXT;
10472
595k
  SKIP_BLANKS;
10473
595k
  if (RAW == '"') {
10474
218k
      NEXT;
10475
218k
      encoding = xmlParseEncName(ctxt);
10476
218k
      if (RAW != '"') {
10477
33.9k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
33.9k
    xmlFree((xmlChar *) encoding);
10479
33.9k
    return(NULL);
10480
33.9k
      } else
10481
184k
          NEXT;
10482
377k
  } else if (RAW == '\''){
10483
365k
      NEXT;
10484
365k
      encoding = xmlParseEncName(ctxt);
10485
365k
      if (RAW != '\'') {
10486
71.4k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
71.4k
    xmlFree((xmlChar *) encoding);
10488
71.4k
    return(NULL);
10489
71.4k
      } else
10490
294k
          NEXT;
10491
365k
  } else {
10492
11.6k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
11.6k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
490k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
163k
      xmlFree((xmlChar *) encoding);
10500
163k
            return(NULL);
10501
163k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
326k
        if ((encoding != NULL) &&
10508
326k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
319k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
441
      if ((ctxt->encoding == NULL) &&
10517
441
          (ctxt->input->buf != NULL) &&
10518
441
          (ctxt->input->buf->encoder == NULL)) {
10519
441
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
441
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
441
      }
10522
441
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
441
      ctxt->encoding = encoding;
10525
441
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
325k
        else if ((encoding != NULL) &&
10530
325k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
319k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
29.0k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
29.0k
      ctxt->encoding = encoding;
10535
29.0k
  }
10536
296k
  else if (encoding != NULL) {
10537
290k
      xmlCharEncodingHandlerPtr handler;
10538
10539
290k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
290k
      ctxt->input->encoding = encoding;
10542
10543
290k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
290k
      if (handler != NULL) {
10545
275k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
280
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
280
        return(NULL);
10549
280
    }
10550
275k
      } else {
10551
14.9k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
14.9k
      "Unsupported encoding %s\n", encoding);
10553
14.9k
    return(NULL);
10554
14.9k
      }
10555
290k
  }
10556
326k
    }
10557
1.22M
    return(encoding);
10558
1.52M
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
1.34M
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
1.34M
    int standalone = -2;
10596
10597
1.34M
    SKIP_BLANKS;
10598
1.34M
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
51.5k
  SKIP(10);
10600
51.5k
        SKIP_BLANKS;
10601
51.5k
  if (RAW != '=') {
10602
4.08k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
4.08k
      return(standalone);
10604
4.08k
        }
10605
47.4k
  NEXT;
10606
47.4k
  SKIP_BLANKS;
10607
47.4k
        if (RAW == '\''){
10608
3.90k
      NEXT;
10609
3.90k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
3.02k
          standalone = 0;
10611
3.02k
                SKIP(2);
10612
3.02k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
879
                 (NXT(2) == 's')) {
10614
75
          standalone = 1;
10615
75
    SKIP(3);
10616
804
            } else {
10617
804
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
804
      }
10619
3.90k
      if (RAW != '\'') {
10620
1.00k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
1.00k
      } else
10622
2.89k
          NEXT;
10623
43.5k
  } else if (RAW == '"'){
10624
42.4k
      NEXT;
10625
42.4k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
18.7k
          standalone = 0;
10627
18.7k
    SKIP(2);
10628
23.7k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
23.7k
                 (NXT(2) == 's')) {
10630
18.2k
          standalone = 1;
10631
18.2k
                SKIP(3);
10632
18.2k
            } else {
10633
5.51k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
5.51k
      }
10635
42.4k
      if (RAW != '"') {
10636
6.43k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
6.43k
      } else
10638
36.0k
          NEXT;
10639
42.4k
  } else {
10640
1.06k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
1.06k
        }
10642
47.4k
    }
10643
1.33M
    return(standalone);
10644
1.34M
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
1.69M
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
1.69M
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
1.69M
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
1.69M
    SKIP(5);
10672
10673
1.69M
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
1.69M
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
1.69M
    version = xmlParseVersionInfo(ctxt);
10683
1.69M
    if (version == NULL) {
10684
620k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
1.07M
    } else {
10686
1.07M
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
373k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
123k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
123k
                "Unsupported version '%s'\n",
10693
123k
                version);
10694
250k
      } else {
10695
250k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
25.1k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
25.1k
                      "Unsupported version '%s'\n",
10698
25.1k
          version, NULL);
10699
225k
    } else {
10700
225k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
225k
              "Unsupported version '%s'\n",
10702
225k
              version);
10703
225k
    }
10704
250k
      }
10705
373k
  }
10706
1.07M
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
1.07M
  ctxt->version = version;
10709
1.07M
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
1.69M
    if (!IS_BLANK_CH(RAW)) {
10715
887k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
179k
      SKIP(2);
10717
179k
      return;
10718
179k
  }
10719
707k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
707k
    }
10721
1.52M
    xmlParseEncodingDecl(ctxt);
10722
1.52M
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
1.52M
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
15.1k
        return;
10728
15.1k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
1.50M
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
256k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
163k
      SKIP(2);
10736
163k
      return;
10737
163k
  }
10738
92.6k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
92.6k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
1.34M
    GROW;
10745
10746
1.34M
    SKIP_BLANKS;
10747
1.34M
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
1.34M
    SKIP_BLANKS;
10750
1.34M
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
182k
        SKIP(2);
10752
1.15M
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
11.1k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
11.1k
  NEXT;
10756
1.14M
    } else {
10757
1.14M
        int c;
10758
10759
1.14M
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
44.1M
        while ((c = CUR) != 0) {
10761
43.9M
            NEXT;
10762
43.9M
            if (c == '>')
10763
1.00M
                break;
10764
43.9M
        }
10765
1.14M
    }
10766
1.34M
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
4.10M
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
4.45M
    while (ctxt->instate != XML_PARSER_EOF) {
10782
4.45M
        SKIP_BLANKS;
10783
4.45M
        GROW;
10784
4.45M
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
298k
      xmlParsePI(ctxt);
10786
4.15M
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
52.1k
      xmlParseComment(ctxt);
10788
4.10M
        } else {
10789
4.10M
            break;
10790
4.10M
        }
10791
4.45M
    }
10792
4.10M
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
2.39M
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
2.39M
    xmlChar start[4];
10812
2.39M
    xmlCharEncoding enc;
10813
10814
2.39M
    xmlInitParser();
10815
10816
2.39M
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
2.39M
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
2.39M
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
2.39M
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
2.39M
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
2.39M
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
2.39M
    if ((ctxt->encoding == NULL) &&
10835
2.39M
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
2.26M
  start[0] = RAW;
10842
2.26M
  start[1] = NXT(1);
10843
2.26M
  start[2] = NXT(2);
10844
2.26M
  start[3] = NXT(3);
10845
2.26M
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
2.26M
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
832k
      xmlSwitchEncoding(ctxt, enc);
10848
832k
  }
10849
2.26M
    }
10850
10851
10852
2.39M
    if (CUR == 0) {
10853
69.2k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
69.2k
  return(-1);
10855
69.2k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
2.32M
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
399k
       GROW;
10865
399k
    }
10866
2.32M
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
566k
  xmlParseXMLDecl(ctxt);
10872
566k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
566k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
5.04k
      return(-1);
10878
5.04k
  }
10879
561k
  ctxt->standalone = ctxt->input->standalone;
10880
561k
  SKIP_BLANKS;
10881
1.76M
    } else {
10882
1.76M
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
1.76M
    }
10884
2.32M
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
2.17M
        ctxt->sax->startDocument(ctxt->userData);
10886
2.32M
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
2.32M
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
2.32M
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
2.32M
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
2.32M
    GROW;
10903
2.32M
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
355k
  ctxt->inSubset = 1;
10906
355k
  xmlParseDocTypeDecl(ctxt);
10907
355k
  if (RAW == '[') {
10908
264k
      ctxt->instate = XML_PARSER_DTD;
10909
264k
      xmlParseInternalSubset(ctxt);
10910
264k
      if (ctxt->instate == XML_PARSER_EOF)
10911
209k
    return(-1);
10912
264k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
146k
  ctxt->inSubset = 2;
10918
146k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
146k
      (!ctxt->disableSAX))
10920
112k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
112k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
146k
  if (ctxt->instate == XML_PARSER_EOF)
10923
2.73k
      return(-1);
10924
143k
  ctxt->inSubset = 0;
10925
10926
143k
        xmlCleanSpecialAttr(ctxt);
10927
10928
143k
  ctxt->instate = XML_PARSER_PROLOG;
10929
143k
  xmlParseMisc(ctxt);
10930
143k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
2.11M
    GROW;
10936
2.11M
    if (RAW != '<') {
10937
470k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
470k
           "Start tag expected, '<' not found\n");
10939
1.63M
    } else {
10940
1.63M
  ctxt->instate = XML_PARSER_CONTENT;
10941
1.63M
  xmlParseElement(ctxt);
10942
1.63M
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
1.63M
  xmlParseMisc(ctxt);
10949
10950
1.63M
  if (RAW != 0) {
10951
672k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
672k
  }
10953
1.63M
  ctxt->instate = XML_PARSER_EOF;
10954
1.63M
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
2.11M
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
2.11M
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
2.11M
    if ((ctxt->myDoc != NULL) &&
10966
2.11M
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
399
  xmlFreeDoc(ctxt->myDoc);
10968
399
  ctxt->myDoc = NULL;
10969
399
    }
10970
10971
2.11M
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
7.94k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
7.94k
  if (ctxt->valid)
10974
6.38k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
7.94k
  if (ctxt->nsWellFormed)
10976
7.31k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
7.94k
  if (ctxt->options & XML_PARSE_OLD10)
10978
1.24k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
7.94k
    }
10980
2.11M
    if (! ctxt->wellFormed) {
10981
2.10M
  ctxt->valid = 0;
10982
2.10M
  return(-1);
10983
2.10M
    }
10984
7.94k
    return(0);
10985
2.11M
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
5.37M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
5.37M
    const xmlChar *cur;
11110
11111
5.37M
    if (ctxt->checkIndex == 0) {
11112
4.81M
        cur = ctxt->input->cur + 1;
11113
4.81M
    } else {
11114
564k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
564k
    }
11116
11117
5.37M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
808k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
808k
        return(0);
11120
4.56M
    } else {
11121
4.56M
        ctxt->checkIndex = 0;
11122
4.56M
        return(1);
11123
4.56M
    }
11124
5.37M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
6.49M
                     const char *str, size_t strLen) {
11138
6.49M
    const xmlChar *cur, *term;
11139
11140
6.49M
    if (ctxt->checkIndex == 0) {
11141
3.57M
        cur = ctxt->input->cur + startDelta;
11142
3.57M
    } else {
11143
2.91M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
2.91M
    }
11145
11146
6.49M
    term = BAD_CAST strstr((const char *) cur, str);
11147
6.49M
    if (term == NULL) {
11148
4.04M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
4.04M
        if ((size_t) (end - cur) < strLen)
11152
328k
            end = cur;
11153
3.71M
        else
11154
3.71M
            end -= strLen - 1;
11155
4.04M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
4.04M
    } else {
11157
2.44M
        ctxt->checkIndex = 0;
11158
2.44M
    }
11159
11160
6.49M
    return(term);
11161
6.49M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
22.5M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
22.5M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
22.5M
    const xmlChar *end = ctxt->input->end;
11173
11174
788M
    while (cur < end) {
11175
787M
        if ((*cur == '<') || (*cur == '&')) {
11176
21.5M
            ctxt->checkIndex = 0;
11177
21.5M
            return(1);
11178
21.5M
        }
11179
765M
        cur++;
11180
765M
    }
11181
11182
1.00M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
1.00M
    return(0);
11184
22.5M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
15.1M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
15.1M
    const xmlChar *cur;
11196
15.1M
    const xmlChar *end = ctxt->input->end;
11197
15.1M
    int state = ctxt->endCheckState;
11198
11199
15.1M
    if (ctxt->checkIndex == 0)
11200
12.0M
        cur = ctxt->input->cur + 1;
11201
3.13M
    else
11202
3.13M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
921M
    while (cur < end) {
11205
917M
        if (state) {
11206
461M
            if (*cur == state)
11207
10.3M
                state = 0;
11208
461M
        } else if (*cur == '\'' || *cur == '"') {
11209
11.0M
            state = *cur;
11210
444M
        } else if (*cur == '>') {
11211
10.6M
            ctxt->checkIndex = 0;
11212
10.6M
            ctxt->endCheckState = 0;
11213
10.6M
            return(1);
11214
10.6M
        }
11215
906M
        cur++;
11216
906M
    }
11217
11218
4.51M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
4.51M
    ctxt->endCheckState = state;
11220
4.51M
    return(0);
11221
15.1M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
892k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
892k
    const xmlChar *cur, *start;
11240
892k
    const xmlChar *end = ctxt->input->end;
11241
892k
    int state = ctxt->endCheckState;
11242
11243
892k
    if (ctxt->checkIndex == 0) {
11244
345k
        cur = ctxt->input->cur + 1;
11245
547k
    } else {
11246
547k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
547k
    }
11248
892k
    start = cur;
11249
11250
136M
    while (cur < end) {
11251
135M
        if (state == '-') {
11252
18.9M
            if ((*cur == '-') &&
11253
18.9M
                (cur[1] == '-') &&
11254
18.9M
                (cur[2] == '>')) {
11255
149k
                state = 0;
11256
149k
                cur += 3;
11257
149k
                start = cur;
11258
149k
                continue;
11259
149k
            }
11260
18.9M
        }
11261
116M
        else if (state == ']') {
11262
626k
            if (*cur == '>') {
11263
138k
                ctxt->checkIndex = 0;
11264
138k
                ctxt->endCheckState = 0;
11265
138k
                return(1);
11266
138k
            }
11267
488k
            if (IS_BLANK_CH(*cur)) {
11268
26.8k
                state = ' ';
11269
461k
            } else if (*cur != ']') {
11270
85.1k
                state = 0;
11271
85.1k
                start = cur;
11272
85.1k
                continue;
11273
85.1k
            }
11274
488k
        }
11275
116M
        else if (state == ' ') {
11276
119k
            if (*cur == '>') {
11277
2.44k
                ctxt->checkIndex = 0;
11278
2.44k
                ctxt->endCheckState = 0;
11279
2.44k
                return(1);
11280
2.44k
            }
11281
117k
            if (!IS_BLANK_CH(*cur)) {
11282
24.1k
                state = 0;
11283
24.1k
                start = cur;
11284
24.1k
                continue;
11285
24.1k
            }
11286
117k
        }
11287
116M
        else if (state != 0) {
11288
47.5M
            if (*cur == state) {
11289
1.22M
                state = 0;
11290
1.22M
                start = cur + 1;
11291
1.22M
            }
11292
47.5M
        }
11293
68.6M
        else if (*cur == '<') {
11294
1.94M
            if ((cur[1] == '!') &&
11295
1.94M
                (cur[2] == '-') &&
11296
1.94M
                (cur[3] == '-')) {
11297
166k
                state = '-';
11298
166k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
166k
                start = cur;
11301
166k
                continue;
11302
166k
            }
11303
1.94M
        }
11304
66.6M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
1.55M
            state = *cur;
11306
1.55M
        }
11307
11308
135M
        cur++;
11309
135M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
752k
    if ((state == 0) || (state == '-')) {
11316
449k
        if (cur - start < 3)
11317
25.7k
            cur = start;
11318
423k
        else
11319
423k
            cur -= 3;
11320
449k
    }
11321
752k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
752k
    ctxt->endCheckState = state;
11323
752k
    return(0);
11324
892k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
746k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
746k
    int ix;
11340
746k
    unsigned char c;
11341
746k
    int codepoint;
11342
11343
746k
    if ((utf == NULL) || (len <= 0))
11344
7.73k
        return(0);
11345
11346
30.3M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
30.0M
        c = utf[ix];
11348
30.0M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
23.7M
      if (c >= 0x20)
11350
22.3M
    ix++;
11351
1.38M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
1.25M
          ix++;
11353
133k
      else
11354
133k
          return(-ix);
11355
23.7M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
5.87M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
5.85M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
76.5k
          return(-ix);
11359
5.77M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
5.77M
      codepoint |= utf[ix+1] & 0x3f;
11361
5.77M
      if (!xmlIsCharQ(codepoint))
11362
1.19k
          return(-ix);
11363
5.77M
      ix += 2;
11364
5.77M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
242k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
238k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
238k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
54.2k
        return(-ix);
11369
184k
      codepoint = (utf[ix] & 0xf) << 12;
11370
184k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
184k
      codepoint |= utf[ix+2] & 0x3f;
11372
184k
      if (!xmlIsCharQ(codepoint))
11373
444
          return(-ix);
11374
183k
      ix += 3;
11375
256k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
120k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
117k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
117k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
117k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
39.2k
        return(-ix);
11381
78.6k
      codepoint = (utf[ix] & 0x7) << 18;
11382
78.6k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
78.6k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
78.6k
      codepoint |= utf[ix+3] & 0x3f;
11385
78.6k
      if (!xmlIsCharQ(codepoint))
11386
1.96k
          return(-ix);
11387
76.6k
      ix += 4;
11388
76.6k
  } else       /* unknown encoding */
11389
135k
      return(-ix);
11390
30.0M
      }
11391
271k
      return(ix);
11392
738k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
15.7M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
15.7M
    int ret = 0;
11406
15.7M
    int avail, tlen;
11407
15.7M
    xmlChar cur, next;
11408
11409
15.7M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
15.7M
    if ((ctxt->input != NULL) &&
11466
15.7M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
18.6k
        xmlParserInputShrink(ctxt->input);
11468
18.6k
    }
11469
11470
128M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
128M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
1.03M
      return(0);
11473
11474
127M
  if (ctxt->input == NULL) break;
11475
127M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
127M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
127M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
127M
          (ctxt->input->buf->raw != NULL) &&
11488
127M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
2.60M
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
2.60M
                                                 ctxt->input);
11491
2.60M
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
2.60M
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
2.60M
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
2.60M
                                      base, current);
11496
2.60M
      }
11497
127M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
127M
        (ctxt->input->cur - ctxt->input->base);
11499
127M
  }
11500
127M
        if (avail < 1)
11501
1.15M
      goto done;
11502
126M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
11.4M
            case XML_PARSER_START:
11509
11.4M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
4.33M
        xmlChar start[4];
11511
4.33M
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
4.33M
        if (avail < 4)
11517
631k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
3.70M
        start[0] = RAW;
11527
3.70M
        start[1] = NXT(1);
11528
3.70M
        start[2] = NXT(2);
11529
3.70M
        start[3] = NXT(3);
11530
3.70M
        enc = xmlDetectCharEncoding(start, 4);
11531
3.70M
        xmlSwitchEncoding(ctxt, enc);
11532
3.70M
        break;
11533
4.33M
    }
11534
11535
7.15M
    if (avail < 2)
11536
5.76k
        goto done;
11537
7.15M
    cur = ctxt->input->cur[0];
11538
7.15M
    next = ctxt->input->cur[1];
11539
7.15M
    if (cur == 0) {
11540
110k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
110k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
110k
                  &xmlDefaultSAXLocator);
11543
110k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
110k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
110k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
110k
      ctxt->sax->endDocument(ctxt->userData);
11551
110k
        goto done;
11552
110k
    }
11553
7.03M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
4.26M
        if (avail < 5) goto done;
11556
4.24M
        if ((!terminate) &&
11557
4.24M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
2.66M
      goto done;
11559
1.57M
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
1.57M
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
1.57M
                  &xmlDefaultSAXLocator);
11562
1.57M
        if ((ctxt->input->cur[2] == 'x') &&
11563
1.57M
      (ctxt->input->cur[3] == 'm') &&
11564
1.57M
      (ctxt->input->cur[4] == 'l') &&
11565
1.57M
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
1.13M
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
1.13M
      xmlParseXMLDecl(ctxt);
11572
1.13M
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
10.0k
          xmlHaltParser(ctxt);
11578
10.0k
          return(0);
11579
10.0k
      }
11580
1.12M
      ctxt->standalone = ctxt->input->standalone;
11581
1.12M
      if ((ctxt->encoding == NULL) &&
11582
1.12M
          (ctxt->input->encoding != NULL))
11583
183k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
1.12M
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
1.12M
          (!ctxt->disableSAX))
11586
827k
          ctxt->sax->startDocument(ctxt->userData);
11587
1.12M
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
1.12M
        } else {
11593
445k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
445k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
445k
          (!ctxt->disableSAX))
11596
445k
          ctxt->sax->startDocument(ctxt->userData);
11597
445k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
445k
        }
11603
2.77M
    } else {
11604
2.77M
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
2.77M
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
2.77M
                  &xmlDefaultSAXLocator);
11607
2.77M
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
2.77M
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
2.77M
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
2.77M
            (!ctxt->disableSAX))
11614
2.77M
      ctxt->sax->startDocument(ctxt->userData);
11615
2.77M
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
2.77M
    }
11621
4.34M
    break;
11622
19.6M
            case XML_PARSER_START_TAG: {
11623
19.6M
          const xmlChar *name;
11624
19.6M
    const xmlChar *prefix = NULL;
11625
19.6M
    const xmlChar *URI = NULL;
11626
19.6M
                int line = ctxt->input->line;
11627
19.6M
    int nsNr = ctxt->nsNr;
11628
11629
19.6M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
19.6M
    cur = ctxt->input->cur[0];
11632
19.6M
          if (cur != '<') {
11633
394k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
394k
        xmlHaltParser(ctxt);
11635
394k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
394k
      ctxt->sax->endDocument(ctxt->userData);
11637
394k
        goto done;
11638
394k
    }
11639
19.2M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
4.14M
                    goto done;
11641
15.0M
    if (ctxt->spaceNr == 0)
11642
582k
        spacePush(ctxt, -1);
11643
14.5M
    else if (*ctxt->space == -2)
11644
3.78M
        spacePush(ctxt, -1);
11645
10.7M
    else
11646
10.7M
        spacePush(ctxt, *ctxt->space);
11647
15.0M
#ifdef LIBXML_SAX1_ENABLED
11648
15.0M
    if (ctxt->sax2)
11649
9.50M
#endif /* LIBXML_SAX1_ENABLED */
11650
9.50M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
5.58M
#ifdef LIBXML_SAX1_ENABLED
11652
5.58M
    else
11653
5.58M
        name = xmlParseStartTag(ctxt);
11654
15.0M
#endif /* LIBXML_SAX1_ENABLED */
11655
15.0M
    if (ctxt->instate == XML_PARSER_EOF)
11656
128
        goto done;
11657
15.0M
    if (name == NULL) {
11658
443k
        spacePop(ctxt);
11659
443k
        xmlHaltParser(ctxt);
11660
443k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
443k
      ctxt->sax->endDocument(ctxt->userData);
11662
443k
        goto done;
11663
443k
    }
11664
14.6M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
14.6M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
14.6M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
14.6M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
14.6M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
997k
        SKIP(2);
11680
11681
997k
        if (ctxt->sax2) {
11682
735k
      if ((ctxt->sax != NULL) &&
11683
735k
          (ctxt->sax->endElementNs != NULL) &&
11684
735k
          (!ctxt->disableSAX))
11685
733k
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
733k
                                  prefix, URI);
11687
735k
      if (ctxt->nsNr - nsNr > 0)
11688
70.3k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
735k
#ifdef LIBXML_SAX1_ENABLED
11690
735k
        } else {
11691
261k
      if ((ctxt->sax != NULL) &&
11692
261k
          (ctxt->sax->endElement != NULL) &&
11693
261k
          (!ctxt->disableSAX))
11694
260k
          ctxt->sax->endElement(ctxt->userData, name);
11695
261k
#endif /* LIBXML_SAX1_ENABLED */
11696
261k
        }
11697
997k
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
997k
        spacePop(ctxt);
11700
997k
        if (ctxt->nameNr == 0) {
11701
12.5k
      ctxt->instate = XML_PARSER_EPILOG;
11702
984k
        } else {
11703
984k
      ctxt->instate = XML_PARSER_CONTENT;
11704
984k
        }
11705
997k
        break;
11706
997k
    }
11707
13.6M
    if (RAW == '>') {
11708
5.70M
        NEXT;
11709
7.94M
    } else {
11710
7.94M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
7.94M
           "Couldn't find end of Start Tag %s\n",
11712
7.94M
           name);
11713
7.94M
        nodePop(ctxt);
11714
7.94M
        spacePop(ctxt);
11715
7.94M
    }
11716
13.6M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
13.6M
    ctxt->instate = XML_PARSER_CONTENT;
11719
13.6M
                break;
11720
14.6M
      }
11721
85.6M
            case XML_PARSER_CONTENT: {
11722
85.6M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
246k
        goto done;
11724
85.3M
    cur = ctxt->input->cur[0];
11725
85.3M
    next = ctxt->input->cur[1];
11726
11727
85.3M
    if ((cur == '<') && (next == '/')) {
11728
2.15M
        ctxt->instate = XML_PARSER_END_TAG;
11729
2.15M
        break;
11730
83.2M
          } else if ((cur == '<') && (next == '?')) {
11731
998k
        if ((!terminate) &&
11732
998k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
359k
      goto done;
11734
639k
        xmlParsePI(ctxt);
11735
639k
        ctxt->instate = XML_PARSER_CONTENT;
11736
82.2M
    } else if ((cur == '<') && (next != '!')) {
11737
12.4M
        ctxt->instate = XML_PARSER_START_TAG;
11738
12.4M
        break;
11739
69.7M
    } else if ((cur == '<') && (next == '!') &&
11740
69.7M
               (ctxt->input->cur[2] == '-') &&
11741
69.7M
         (ctxt->input->cur[3] == '-')) {
11742
1.13M
        if ((!terminate) &&
11743
1.13M
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
392k
      goto done;
11745
741k
        xmlParseComment(ctxt);
11746
741k
        ctxt->instate = XML_PARSER_CONTENT;
11747
68.6M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
68.6M
        (ctxt->input->cur[2] == '[') &&
11749
68.6M
        (ctxt->input->cur[3] == 'C') &&
11750
68.6M
        (ctxt->input->cur[4] == 'D') &&
11751
68.6M
        (ctxt->input->cur[5] == 'A') &&
11752
68.6M
        (ctxt->input->cur[6] == 'T') &&
11753
68.6M
        (ctxt->input->cur[7] == 'A') &&
11754
68.6M
        (ctxt->input->cur[8] == '[')) {
11755
384k
        SKIP(9);
11756
384k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
384k
        break;
11758
68.2M
    } else if ((cur == '<') && (next == '!') &&
11759
68.2M
               (avail < 9)) {
11760
73.6k
        goto done;
11761
68.1M
    } else if (cur == '<') {
11762
3.19M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
3.19M
                    "detected an error in element content\n");
11764
3.19M
                    SKIP(1);
11765
64.9M
    } else if (cur == '&') {
11766
5.81M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
703k
      goto done;
11768
5.10M
        xmlParseReference(ctxt);
11769
59.1M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
59.1M
        if ((ctxt->inputNr == 1) &&
11783
59.1M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
35.4M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
1.00M
          goto done;
11786
35.4M
                    }
11787
58.1M
                    ctxt->checkIndex = 0;
11788
58.1M
        xmlParseCharData(ctxt, 0);
11789
58.1M
    }
11790
67.8M
    break;
11791
85.3M
      }
11792
67.8M
            case XML_PARSER_END_TAG:
11793
2.24M
    if (avail < 2)
11794
0
        goto done;
11795
2.24M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
105k
        goto done;
11797
2.13M
    if (ctxt->sax2) {
11798
1.42M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.42M
        nameNsPop(ctxt);
11800
1.42M
    }
11801
714k
#ifdef LIBXML_SAX1_ENABLED
11802
714k
      else
11803
714k
        xmlParseEndTag1(ctxt, 0);
11804
2.13M
#endif /* LIBXML_SAX1_ENABLED */
11805
2.13M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
2.13M
    } else if (ctxt->nameNr == 0) {
11808
61.4k
        ctxt->instate = XML_PARSER_EPILOG;
11809
2.07M
    } else {
11810
2.07M
        ctxt->instate = XML_PARSER_CONTENT;
11811
2.07M
    }
11812
2.13M
    break;
11813
997k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
997k
    const xmlChar *term;
11819
11820
997k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
163k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
163k
                                           "]]>");
11827
833k
                } else {
11828
833k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
833k
                }
11830
11831
997k
    if (term == NULL) {
11832
574k
        int tmp, size;
11833
11834
574k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
84.5k
                        size = ctxt->input->end - ctxt->input->cur;
11837
490k
                    } else {
11838
490k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
251k
                            goto done;
11840
239k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
239k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
239k
                    }
11844
323k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
323k
                    if (tmp <= 0) {
11846
244k
                        tmp = -tmp;
11847
244k
                        ctxt->input->cur += tmp;
11848
244k
                        goto encoding_error;
11849
244k
                    }
11850
79.1k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
79.1k
                        if (ctxt->sax->cdataBlock != NULL)
11852
45.7k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
45.7k
                                                  ctxt->input->cur, tmp);
11854
33.4k
                        else if (ctxt->sax->characters != NULL)
11855
33.4k
                            ctxt->sax->characters(ctxt->userData,
11856
33.4k
                                                  ctxt->input->cur, tmp);
11857
79.1k
                    }
11858
79.1k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
79.1k
                    SKIPL(tmp);
11861
422k
    } else {
11862
422k
                    int base = term - CUR_PTR;
11863
422k
        int tmp;
11864
11865
422k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
422k
        if ((tmp < 0) || (tmp != base)) {
11867
207k
      tmp = -tmp;
11868
207k
      ctxt->input->cur += tmp;
11869
207k
      goto encoding_error;
11870
207k
        }
11871
215k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
215k
            (ctxt->sax->cdataBlock != NULL) &&
11873
215k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
4.73k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
4.73k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
4.73k
                     "<![CDATA[", 9)))
11882
4.73k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
4.73k
                                 BAD_CAST "", 0);
11884
210k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
210k
      (!ctxt->disableSAX)) {
11886
207k
      if (ctxt->sax->cdataBlock != NULL)
11887
132k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
132k
              ctxt->input->cur, base);
11889
75.0k
      else if (ctxt->sax->characters != NULL)
11890
75.0k
          ctxt->sax->characters(ctxt->userData,
11891
75.0k
              ctxt->input->cur, base);
11892
207k
        }
11893
215k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
215k
        SKIPL(base + 3);
11896
215k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
215k
    }
11902
294k
    break;
11903
997k
      }
11904
4.85M
            case XML_PARSER_MISC:
11905
5.08M
            case XML_PARSER_PROLOG:
11906
5.15M
            case XML_PARSER_EPILOG:
11907
5.15M
    SKIP_BLANKS;
11908
5.15M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
5.15M
    else
11912
5.15M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
5.15M
                (ctxt->input->cur - ctxt->input->base);
11914
5.15M
    if (avail < 2)
11915
30.8k
        goto done;
11916
5.12M
    cur = ctxt->input->cur[0];
11917
5.12M
    next = ctxt->input->cur[1];
11918
5.12M
          if ((cur == '<') && (next == '?')) {
11919
549k
        if ((!terminate) &&
11920
549k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
62.1k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
487k
        xmlParsePI(ctxt);
11927
487k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
4.57M
    } else if ((cur == '<') && (next == '!') &&
11930
4.57M
        (ctxt->input->cur[2] == '-') &&
11931
4.57M
        (ctxt->input->cur[3] == '-')) {
11932
140k
        if ((!terminate) &&
11933
140k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
71.6k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
68.5k
        xmlParseComment(ctxt);
11940
68.5k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
4.43M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
4.43M
                    (cur == '<') && (next == '!') &&
11944
4.43M
        (ctxt->input->cur[2] == 'D') &&
11945
4.43M
        (ctxt->input->cur[3] == 'O') &&
11946
4.43M
        (ctxt->input->cur[4] == 'C') &&
11947
4.43M
        (ctxt->input->cur[5] == 'T') &&
11948
4.43M
        (ctxt->input->cur[6] == 'Y') &&
11949
4.43M
        (ctxt->input->cur[7] == 'P') &&
11950
4.43M
        (ctxt->input->cur[8] == 'E')) {
11951
991k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
363k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
627k
        ctxt->inSubset = 1;
11958
627k
        xmlParseDocTypeDecl(ctxt);
11959
627k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
627k
        if (RAW == '[') {
11962
486k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
486k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
140k
      ctxt->inSubset = 2;
11972
140k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
140k
          (ctxt->sax->externalSubset != NULL))
11974
118k
          ctxt->sax->externalSubset(ctxt->userData,
11975
118k
            ctxt->intSubName, ctxt->extSubSystem,
11976
118k
            ctxt->extSubURI);
11977
140k
      ctxt->inSubset = 0;
11978
140k
      xmlCleanSpecialAttr(ctxt);
11979
140k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
140k
        }
11985
3.44M
    } else if ((cur == '<') && (next == '!') &&
11986
3.44M
               (avail <
11987
62.0k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
25.0k
        goto done;
11989
3.42M
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
44.4k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
44.4k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
44.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
44.4k
      ctxt->sax->endDocument(ctxt->userData);
11998
44.4k
        goto done;
11999
3.37M
                } else {
12000
3.37M
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
3.37M
    }
12006
4.56M
    break;
12007
4.56M
            case XML_PARSER_DTD: {
12008
1.06M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
752k
                    goto done;
12010
316k
    xmlParseInternalSubset(ctxt);
12011
316k
    if (ctxt->instate == XML_PARSER_EOF)
12012
220k
        goto done;
12013
95.3k
    ctxt->inSubset = 2;
12014
95.3k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
95.3k
        (ctxt->sax->externalSubset != NULL))
12016
89.9k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
89.9k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
95.3k
    ctxt->inSubset = 0;
12019
95.3k
    xmlCleanSpecialAttr(ctxt);
12020
95.3k
    if (ctxt->instate == XML_PARSER_EOF)
12021
614
        goto done;
12022
94.7k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
94.7k
                break;
12028
95.3k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
126M
  }
12102
126M
    }
12103
14.2M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
14.2M
    return(ret);
12108
452k
encoding_error:
12109
452k
    {
12110
452k
        char buffer[150];
12111
12112
452k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
452k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
452k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
452k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
452k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
452k
         BAD_CAST buffer, NULL);
12118
452k
    }
12119
452k
    return(0);
12120
15.7M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
18.2M
              int terminate) {
12136
18.2M
    int end_in_lf = 0;
12137
18.2M
    int remain = 0;
12138
12139
18.2M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
18.2M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
2.74M
        return(ctxt->errNo);
12143
15.4M
    if (ctxt->instate == XML_PARSER_EOF)
12144
24.8k
        return(-1);
12145
15.4M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
15.4M
    ctxt->progressive = 1;
12149
15.4M
    if (ctxt->instate == XML_PARSER_START)
12150
7.53M
        xmlDetectSAX2(ctxt);
12151
15.4M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
15.4M
        (chunk[size - 1] == '\r')) {
12153
86.1k
  end_in_lf = 1;
12154
86.1k
  size--;
12155
86.1k
    }
12156
12157
15.8M
xmldecl_done:
12158
12159
15.8M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
15.8M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
12.3M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
12.3M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
12.3M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
12.3M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
12.3M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
770k
            unsigned int len = 45;
12173
12174
770k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
770k
                               BAD_CAST "UTF-16")) ||
12176
770k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
10.4k
                               BAD_CAST "UTF16")))
12178
760k
                len = 90;
12179
10.4k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
10.4k
                                    BAD_CAST "UCS-4")) ||
12181
10.4k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
312
                                    BAD_CAST "UCS4")))
12183
10.1k
                len = 180;
12184
12185
770k
            if (ctxt->input->buf->rawconsumed < len)
12186
212k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
770k
            if ((unsigned int) size > len) {
12194
452k
                remain = size - len;
12195
452k
                size = len;
12196
452k
            } else {
12197
318k
                remain = 0;
12198
318k
            }
12199
770k
        }
12200
12.3M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
12.3M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
12.3M
  if (res < 0) {
12203
42.9k
      ctxt->errNo = XML_PARSER_EOF;
12204
42.9k
      xmlHaltParser(ctxt);
12205
42.9k
      return (XML_PARSER_EOF);
12206
42.9k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
12.3M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
3.49M
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
3.49M
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
3.49M
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
3.49M
        (in->raw != NULL)) {
12216
495k
    int nbchars;
12217
495k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
495k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
495k
    nbchars = xmlCharEncInput(in, terminate);
12221
495k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
495k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
48.9k
        xmlGenericError(xmlGenericErrorContext,
12225
48.9k
            "xmlParseChunk: encoder error\n");
12226
48.9k
                    xmlHaltParser(ctxt);
12227
48.9k
        return(XML_ERR_INVALID_ENCODING);
12228
48.9k
    }
12229
495k
      }
12230
3.49M
  }
12231
3.49M
    }
12232
12233
15.7M
    if (remain != 0) {
12234
440k
        xmlParseTryOrFinish(ctxt, 0);
12235
15.3M
    } else {
12236
15.3M
        xmlParseTryOrFinish(ctxt, terminate);
12237
15.3M
    }
12238
15.7M
    if (ctxt->instate == XML_PARSER_EOF)
12239
1.22M
        return(ctxt->errNo);
12240
12241
14.5M
    if ((ctxt->input != NULL) &&
12242
14.5M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
14.5M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
14.5M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
14.5M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
1.04M
        return(ctxt->errNo);
12250
12251
13.5M
    if (remain != 0) {
12252
425k
        chunk += size;
12253
425k
        size = remain;
12254
425k
        remain = 0;
12255
425k
        goto xmldecl_done;
12256
425k
    }
12257
13.0M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
13.0M
        (ctxt->input->buf != NULL)) {
12259
76.5k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
76.5k
           ctxt->input);
12261
76.5k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
76.5k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
76.5k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
76.5k
            base, current);
12267
76.5k
    }
12268
13.0M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
1.61M
  int cur_avail = 0;
12273
12274
1.61M
  if (ctxt->input != NULL) {
12275
1.61M
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
1.61M
      else
12279
1.61M
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
1.61M
                    (ctxt->input->cur - ctxt->input->base);
12281
1.61M
  }
12282
12283
1.61M
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
1.61M
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
1.59M
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
1.59M
  }
12287
1.61M
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
2.91k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
2.91k
  }
12290
1.61M
  if (ctxt->instate != XML_PARSER_EOF) {
12291
1.61M
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
1.61M
    ctxt->sax->endDocument(ctxt->userData);
12293
1.61M
  }
12294
1.61M
  ctxt->instate = XML_PARSER_EOF;
12295
1.61M
    }
12296
13.0M
    if (ctxt->wellFormed == 0)
12297
7.51M
  return((xmlParserErrors) ctxt->errNo);
12298
5.56M
    else
12299
5.56M
        return(0);
12300
13.0M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
4.83M
                        const char *chunk, int size, const char *filename) {
12330
4.83M
    xmlParserCtxtPtr ctxt;
12331
4.83M
    xmlParserInputPtr inputStream;
12332
4.83M
    xmlParserInputBufferPtr buf;
12333
4.83M
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
4.83M
    if ((chunk != NULL) && (size >= 4))
12339
2.26M
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
4.83M
    buf = xmlAllocParserInputBuffer(enc);
12342
4.83M
    if (buf == NULL) return(NULL);
12343
12344
4.83M
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
4.83M
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
4.83M
    ctxt->dictNames = 1;
12351
4.83M
    if (filename == NULL) {
12352
2.41M
  ctxt->directory = NULL;
12353
2.41M
    } else {
12354
2.41M
        ctxt->directory = xmlParserGetDirectory(filename);
12355
2.41M
    }
12356
12357
4.83M
    inputStream = xmlNewInputStream(ctxt);
12358
4.83M
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
4.83M
    if (filename == NULL)
12365
2.41M
  inputStream->filename = NULL;
12366
2.41M
    else {
12367
2.41M
  inputStream->filename = (char *)
12368
2.41M
      xmlCanonicPath((const xmlChar *) filename);
12369
2.41M
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
2.41M
    }
12376
4.83M
    inputStream->buf = buf;
12377
4.83M
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
4.83M
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
4.83M
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
4.83M
    if ((size != 0) && (chunk != NULL) &&
12388
4.83M
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
2.26M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
2.26M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
2.26M
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
2.26M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
2.26M
    }
12399
12400
4.83M
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
832k
        xmlSwitchEncoding(ctxt, enc);
12402
832k
    }
12403
12404
4.83M
    return(ctxt);
12405
4.83M
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
3.96M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
3.96M
    if (ctxt == NULL)
12418
0
        return;
12419
3.96M
    ctxt->instate = XML_PARSER_EOF;
12420
3.96M
    ctxt->disableSAX = 1;
12421
3.96M
    while (ctxt->inputNr > 1)
12422
3.17k
        xmlFreeInputStream(inputPop(ctxt));
12423
3.96M
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
3.96M
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
3.96M
        if (ctxt->input->buf != NULL) {
12433
3.40M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
3.40M
            ctxt->input->buf = NULL;
12435
3.40M
        }
12436
3.96M
  ctxt->input->cur = BAD_CAST"";
12437
3.96M
        ctxt->input->length = 0;
12438
3.96M
  ctxt->input->base = ctxt->input->cur;
12439
3.96M
        ctxt->input->end = ctxt->input->cur;
12440
3.96M
    }
12441
3.96M
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
2.42M
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
2.42M
    if (ctxt == NULL)
12452
0
        return;
12453
2.42M
    xmlHaltParser(ctxt);
12454
2.42M
    ctxt->errNo = XML_ERR_USER_STOP;
12455
2.42M
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
4.61k
          const xmlChar *ID, xmlNodePtr *list) {
12832
4.61k
    xmlParserCtxtPtr ctxt;
12833
4.61k
    xmlDocPtr newDoc;
12834
4.61k
    xmlNodePtr newRoot;
12835
4.61k
    xmlParserErrors ret = XML_ERR_OK;
12836
4.61k
    xmlChar start[4];
12837
4.61k
    xmlCharEncoding enc;
12838
12839
4.61k
    if (((depth > 40) &&
12840
4.61k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
4.61k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
4.61k
    if (list != NULL)
12848
4.52k
        *list = NULL;
12849
4.61k
    if ((URL == NULL) && (ID == NULL))
12850
24
  return(XML_ERR_INTERNAL_ERROR);
12851
4.58k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
4.58k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
4.58k
                                             oldctxt);
12856
4.58k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
2.47k
    if (oldctxt != NULL) {
12858
2.47k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
2.47k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
2.47k
    }
12861
2.47k
    xmlDetectSAX2(ctxt);
12862
12863
2.47k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
2.47k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
2.47k
    newDoc->properties = XML_DOC_INTERNAL;
12869
2.47k
    if (doc) {
12870
2.47k
        newDoc->intSubset = doc->intSubset;
12871
2.47k
        newDoc->extSubset = doc->extSubset;
12872
2.47k
        if (doc->dict) {
12873
1.51k
            newDoc->dict = doc->dict;
12874
1.51k
            xmlDictReference(newDoc->dict);
12875
1.51k
        }
12876
2.47k
        if (doc->URL != NULL) {
12877
1.58k
            newDoc->URL = xmlStrdup(doc->URL);
12878
1.58k
        }
12879
2.47k
    }
12880
2.47k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
2.47k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
2.47k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
2.47k
    nodePush(ctxt, newDoc->children);
12891
2.47k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
2.47k
    } else {
12894
2.47k
        ctxt->myDoc = doc;
12895
2.47k
        newRoot->doc = doc;
12896
2.47k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
2.47k
    GROW;
12904
2.47k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
2.42k
  start[0] = RAW;
12906
2.42k
  start[1] = NXT(1);
12907
2.42k
  start[2] = NXT(2);
12908
2.42k
  start[3] = NXT(3);
12909
2.42k
  enc = xmlDetectCharEncoding(start, 4);
12910
2.42k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
973
      xmlSwitchEncoding(ctxt, enc);
12912
973
  }
12913
2.42k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
2.47k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
819
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
819
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
819
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
325
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
325
                           "Version mismatch between document and entity\n");
12927
325
        }
12928
819
    }
12929
12930
2.47k
    ctxt->instate = XML_PARSER_CONTENT;
12931
2.47k
    ctxt->depth = depth;
12932
2.47k
    if (oldctxt != NULL) {
12933
2.47k
  ctxt->_private = oldctxt->_private;
12934
2.47k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
2.47k
  ctxt->validate = oldctxt->validate;
12936
2.47k
  ctxt->valid = oldctxt->valid;
12937
2.47k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
2.47k
        if (oldctxt->validate) {
12939
1.00k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
1.00k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
1.00k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
1.00k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
1.00k
        }
12944
2.47k
  ctxt->external = oldctxt->external;
12945
2.47k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
2.47k
        ctxt->dict = oldctxt->dict;
12947
2.47k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
2.47k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
2.47k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
2.47k
        ctxt->dictNames = oldctxt->dictNames;
12951
2.47k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
2.47k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
2.47k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
2.47k
  ctxt->record_info = oldctxt->record_info;
12955
2.47k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
2.47k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
2.47k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
2.47k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
2.47k
    xmlParseContent(ctxt);
12970
12971
2.47k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
203
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
2.27k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
2.47k
    if (ctxt->node != newDoc->children) {
12977
1.02k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
1.02k
    }
12979
12980
2.47k
    if (!ctxt->wellFormed) {
12981
2.03k
  ret = (xmlParserErrors)ctxt->errNo;
12982
2.03k
        if (oldctxt != NULL) {
12983
2.03k
            oldctxt->errNo = ctxt->errNo;
12984
2.03k
            oldctxt->wellFormed = 0;
12985
2.03k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
2.03k
        }
12987
2.03k
    } else {
12988
445
  if (list != NULL) {
12989
445
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
445
      cur = newDoc->children->children;
12996
445
      *list = cur;
12997
1.20k
      while (cur != NULL) {
12998
757
    cur->parent = NULL;
12999
757
    cur = cur->next;
13000
757
      }
13001
445
            newDoc->children->children = NULL;
13002
445
  }
13003
445
  ret = XML_ERR_OK;
13004
445
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
2.47k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
2.47k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
2.47k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
2.47k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
2.47k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
2.47k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
2.47k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
2.47k
    }
13020
13021
2.47k
    if (oldctxt != NULL) {
13022
2.47k
        ctxt->dict = NULL;
13023
2.47k
        ctxt->attsDefault = NULL;
13024
2.47k
        ctxt->attsSpecial = NULL;
13025
2.47k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
2.47k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
2.47k
        oldctxt->validate = ctxt->validate;
13028
2.47k
        oldctxt->valid = ctxt->valid;
13029
2.47k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
2.47k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
2.47k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
2.47k
    }
13033
2.47k
    ctxt->node_seq.maximum = 0;
13034
2.47k
    ctxt->node_seq.length = 0;
13035
2.47k
    ctxt->node_seq.buffer = NULL;
13036
2.47k
    xmlFreeParserCtxt(ctxt);
13037
2.47k
    newDoc->intSubset = NULL;
13038
2.47k
    newDoc->extSubset = NULL;
13039
2.47k
    xmlFreeDoc(newDoc);
13040
13041
2.47k
    return(ret);
13042
2.47k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
14.5k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
14.5k
    xmlParserCtxtPtr ctxt;
13125
14.5k
    xmlDocPtr newDoc = NULL;
13126
14.5k
    xmlNodePtr newRoot;
13127
14.5k
    xmlSAXHandlerPtr oldsax = NULL;
13128
14.5k
    xmlNodePtr content = NULL;
13129
14.5k
    xmlNodePtr last = NULL;
13130
14.5k
    int size;
13131
14.5k
    xmlParserErrors ret = XML_ERR_OK;
13132
14.5k
#ifdef SAX2
13133
14.5k
    int i;
13134
14.5k
#endif
13135
13136
14.5k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
14.5k
        (oldctxt->depth >  100)) {
13138
57
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
57
                       "Maximum entity nesting depth exceeded");
13140
57
  return(XML_ERR_ENTITY_LOOP);
13141
57
    }
13142
13143
13144
14.5k
    if (lst != NULL)
13145
14.5k
        *lst = NULL;
13146
14.5k
    if (string == NULL)
13147
0
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
14.5k
    size = xmlStrlen(string);
13150
13151
14.5k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
14.5k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
14.5k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
14.5k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
14.5k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
14.5k
    else
13158
14.5k
  ctxt->userData = ctxt;
13159
14.5k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
14.5k
    ctxt->dict = oldctxt->dict;
13161
14.5k
    ctxt->input_id = oldctxt->input_id;
13162
14.5k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
14.5k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
14.5k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
14.5k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
14.5k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
6
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
6
    }
13171
14.5k
#endif
13172
13173
14.5k
    oldsax = ctxt->sax;
13174
14.5k
    ctxt->sax = oldctxt->sax;
13175
14.5k
    xmlDetectSAX2(ctxt);
13176
14.5k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
14.5k
    ctxt->options = oldctxt->options;
13178
13179
14.5k
    ctxt->_private = oldctxt->_private;
13180
14.5k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
14.5k
    } else {
13193
14.5k
  ctxt->myDoc = oldctxt->myDoc;
13194
14.5k
        content = ctxt->myDoc->children;
13195
14.5k
  last = ctxt->myDoc->last;
13196
14.5k
    }
13197
14.5k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
14.5k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
14.5k
    ctxt->myDoc->children = NULL;
13208
14.5k
    ctxt->myDoc->last = NULL;
13209
14.5k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
14.5k
    nodePush(ctxt, ctxt->myDoc->children);
13211
14.5k
    ctxt->instate = XML_PARSER_CONTENT;
13212
14.5k
    ctxt->depth = oldctxt->depth;
13213
13214
14.5k
    ctxt->validate = 0;
13215
14.5k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
14.5k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
13.2k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
13.2k
    }
13222
14.5k
    ctxt->dictNames = oldctxt->dictNames;
13223
14.5k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
14.5k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
14.5k
    xmlParseContent(ctxt);
13227
14.5k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
102
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
14.4k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
14.5k
    if (ctxt->node != ctxt->myDoc->children) {
13233
580
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
580
    }
13235
13236
14.5k
    if (!ctxt->wellFormed) {
13237
3.86k
  ret = (xmlParserErrors)ctxt->errNo;
13238
3.86k
        oldctxt->errNo = ctxt->errNo;
13239
3.86k
        oldctxt->wellFormed = 0;
13240
3.86k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
10.6k
    } else {
13242
10.6k
        ret = XML_ERR_OK;
13243
10.6k
    }
13244
13245
14.5k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
10.6k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
10.6k
  cur = ctxt->myDoc->children->children;
13253
10.6k
  *lst = cur;
13254
41.8k
  while (cur != NULL) {
13255
31.2k
#ifdef LIBXML_VALID_ENABLED
13256
31.2k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
31.2k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
31.2k
    (cur->type == XML_ELEMENT_NODE)) {
13259
7.41k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
7.41k
      oldctxt->myDoc, cur);
13261
7.41k
      }
13262
31.2k
#endif /* LIBXML_VALID_ENABLED */
13263
31.2k
      cur->parent = NULL;
13264
31.2k
      cur = cur->next;
13265
31.2k
  }
13266
10.6k
  ctxt->myDoc->children->children = NULL;
13267
10.6k
    }
13268
14.5k
    if (ctxt->myDoc != NULL) {
13269
14.5k
  xmlFreeNode(ctxt->myDoc->children);
13270
14.5k
        ctxt->myDoc->children = content;
13271
14.5k
        ctxt->myDoc->last = last;
13272
14.5k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
14.5k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
14.5k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
14.5k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
14.5k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
14.5k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
14.5k
    }
13285
13286
14.5k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
14.5k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
14.5k
    ctxt->sax = oldsax;
13289
14.5k
    ctxt->dict = NULL;
13290
14.5k
    ctxt->attsDefault = NULL;
13291
14.5k
    ctxt->attsSpecial = NULL;
13292
14.5k
    xmlFreeParserCtxt(ctxt);
13293
14.5k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
14.5k
    return(ret);
13298
14.5k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
4.58k
        xmlParserCtxtPtr pctx) {
13783
4.58k
    xmlParserCtxtPtr ctxt;
13784
4.58k
    xmlParserInputPtr inputStream;
13785
4.58k
    char *directory = NULL;
13786
4.58k
    xmlChar *uri;
13787
13788
4.58k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
4.58k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
4.58k
    if (pctx != NULL) {
13794
4.58k
        ctxt->options = pctx->options;
13795
4.58k
        ctxt->_private = pctx->_private;
13796
4.58k
  ctxt->input_id = pctx->input_id;
13797
4.58k
    }
13798
13799
    /* Don't read from stdin. */
13800
4.58k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
4.58k
    uri = xmlBuildURI(URL, base);
13804
13805
4.58k
    if (uri == NULL) {
13806
268
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
268
  if (inputStream == NULL) {
13808
268
      xmlFreeParserCtxt(ctxt);
13809
268
      return(NULL);
13810
268
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
4.32k
    } else {
13819
4.32k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
4.32k
  if (inputStream == NULL) {
13821
1.84k
      xmlFree(uri);
13822
1.84k
      xmlFreeParserCtxt(ctxt);
13823
1.84k
      return(NULL);
13824
1.84k
  }
13825
13826
2.47k
  inputPush(ctxt, inputStream);
13827
13828
2.47k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
2.47k
      directory = xmlParserGetDirectory((char *)uri);
13830
2.47k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
2.47k
      ctxt->directory = directory;
13832
2.47k
  xmlFree(uri);
13833
2.47k
    }
13834
2.47k
    return(ctxt);
13835
4.58k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
2.43M
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
2.43M
    xmlParserCtxtPtr ctxt;
14178
2.43M
    xmlParserInputPtr input;
14179
2.43M
    xmlParserInputBufferPtr buf;
14180
14181
2.43M
    if (buffer == NULL)
14182
0
  return(NULL);
14183
2.43M
    if (size <= 0)
14184
21.8k
  return(NULL);
14185
14186
2.41M
    ctxt = xmlNewParserCtxt();
14187
2.41M
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
2.41M
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
2.41M
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
2.41M
    input = xmlNewInputStream(ctxt);
14197
2.41M
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
2.41M
    input->filename = NULL;
14204
2.41M
    input->buf = buf;
14205
2.41M
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
2.41M
    inputPush(ctxt, input);
14208
2.41M
    return(ctxt);
14209
2.41M
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
955M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
955M
    if (xmlParserInitialized != 0)
14525
955M
  return;
14526
14527
1.88k
#ifdef LIBXML_THREAD_ENABLED
14528
1.88k
    __xmlGlobalInitMutexLock();
14529
1.88k
    if (xmlParserInitialized == 0) {
14530
1.88k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
1.88k
  xmlInitThreadsInternal();
14537
1.88k
  xmlInitGlobalsInternal();
14538
1.88k
  xmlInitMemoryInternal();
14539
1.88k
        __xmlInitializeDict();
14540
1.88k
  xmlInitEncodingInternal();
14541
1.88k
  xmlRegisterDefaultInputCallbacks();
14542
1.88k
#ifdef LIBXML_OUTPUT_ENABLED
14543
1.88k
  xmlRegisterDefaultOutputCallbacks();
14544
1.88k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
1.88k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
1.88k
  xmlInitXPathInternal();
14547
1.88k
#endif
14548
1.88k
  xmlParserInitialized = 1;
14549
1.88k
#ifdef LIBXML_THREAD_ENABLED
14550
1.88k
    }
14551
1.88k
    __xmlGlobalInitMutexUnlock();
14552
1.88k
#endif
14553
1.88k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
7.23M
{
14843
7.23M
    if (ctxt == NULL)
14844
0
        return(-1);
14845
7.23M
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
7.23M
    if (options & XML_PARSE_RECOVER) {
14851
4.79M
        ctxt->recovery = 1;
14852
4.79M
        options -= XML_PARSE_RECOVER;
14853
4.79M
  ctxt->options |= XML_PARSE_RECOVER;
14854
4.79M
    } else
14855
2.43M
        ctxt->recovery = 0;
14856
7.23M
    if (options & XML_PARSE_DTDLOAD) {
14857
4.70M
        ctxt->loadsubset = XML_DETECT_IDS;
14858
4.70M
        options -= XML_PARSE_DTDLOAD;
14859
4.70M
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
4.70M
    } else
14861
2.52M
        ctxt->loadsubset = 0;
14862
7.23M
    if (options & XML_PARSE_DTDATTR) {
14863
4.07M
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
4.07M
        options -= XML_PARSE_DTDATTR;
14865
4.07M
  ctxt->options |= XML_PARSE_DTDATTR;
14866
4.07M
    }
14867
7.23M
    if (options & XML_PARSE_NOENT) {
14868
3.54M
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
3.54M
        options -= XML_PARSE_NOENT;
14871
3.54M
  ctxt->options |= XML_PARSE_NOENT;
14872
3.54M
    } else
14873
3.68M
        ctxt->replaceEntities = 0;
14874
7.23M
    if (options & XML_PARSE_PEDANTIC) {
14875
1.88M
        ctxt->pedantic = 1;
14876
1.88M
        options -= XML_PARSE_PEDANTIC;
14877
1.88M
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
1.88M
    } else
14879
5.34M
        ctxt->pedantic = 0;
14880
7.23M
    if (options & XML_PARSE_NOBLANKS) {
14881
3.50M
        ctxt->keepBlanks = 0;
14882
3.50M
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
3.50M
        options -= XML_PARSE_NOBLANKS;
14884
3.50M
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
3.50M
    } else
14886
3.72M
        ctxt->keepBlanks = 1;
14887
7.23M
    if (options & XML_PARSE_DTDVALID) {
14888
3.33M
        ctxt->validate = 1;
14889
3.33M
        if (options & XML_PARSE_NOWARNING)
14890
1.88M
            ctxt->vctxt.warning = NULL;
14891
3.33M
        if (options & XML_PARSE_NOERROR)
14892
2.68M
            ctxt->vctxt.error = NULL;
14893
3.33M
        options -= XML_PARSE_DTDVALID;
14894
3.33M
  ctxt->options |= XML_PARSE_DTDVALID;
14895
3.33M
    } else
14896
3.89M
        ctxt->validate = 0;
14897
7.23M
    if (options & XML_PARSE_NOWARNING) {
14898
2.95M
        ctxt->sax->warning = NULL;
14899
2.95M
        options -= XML_PARSE_NOWARNING;
14900
2.95M
    }
14901
7.23M
    if (options & XML_PARSE_NOERROR) {
14902
5.44M
        ctxt->sax->error = NULL;
14903
5.44M
        ctxt->sax->fatalError = NULL;
14904
5.44M
        options -= XML_PARSE_NOERROR;
14905
5.44M
    }
14906
7.23M
#ifdef LIBXML_SAX1_ENABLED
14907
7.23M
    if (options & XML_PARSE_SAX1) {
14908
2.71M
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
2.71M
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
2.71M
        ctxt->sax->startElementNs = NULL;
14911
2.71M
        ctxt->sax->endElementNs = NULL;
14912
2.71M
        ctxt->sax->initialized = 1;
14913
2.71M
        options -= XML_PARSE_SAX1;
14914
2.71M
  ctxt->options |= XML_PARSE_SAX1;
14915
2.71M
    }
14916
7.23M
#endif /* LIBXML_SAX1_ENABLED */
14917
7.23M
    if (options & XML_PARSE_NODICT) {
14918
3.02M
        ctxt->dictNames = 0;
14919
3.02M
        options -= XML_PARSE_NODICT;
14920
3.02M
  ctxt->options |= XML_PARSE_NODICT;
14921
4.21M
    } else {
14922
4.21M
        ctxt->dictNames = 1;
14923
4.21M
    }
14924
7.23M
    if (options & XML_PARSE_NOCDATA) {
14925
2.87M
        ctxt->sax->cdataBlock = NULL;
14926
2.87M
        options -= XML_PARSE_NOCDATA;
14927
2.87M
  ctxt->options |= XML_PARSE_NOCDATA;
14928
2.87M
    }
14929
7.23M
    if (options & XML_PARSE_NSCLEAN) {
14930
4.35M
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
4.35M
        options -= XML_PARSE_NSCLEAN;
14932
4.35M
    }
14933
7.23M
    if (options & XML_PARSE_NONET) {
14934
3.72M
  ctxt->options |= XML_PARSE_NONET;
14935
3.72M
        options -= XML_PARSE_NONET;
14936
3.72M
    }
14937
7.23M
    if (options & XML_PARSE_COMPACT) {
14938
4.93M
  ctxt->options |= XML_PARSE_COMPACT;
14939
4.93M
        options -= XML_PARSE_COMPACT;
14940
4.93M
    }
14941
7.23M
    if (options & XML_PARSE_OLD10) {
14942
2.56M
  ctxt->options |= XML_PARSE_OLD10;
14943
2.56M
        options -= XML_PARSE_OLD10;
14944
2.56M
    }
14945
7.23M
    if (options & XML_PARSE_NOBASEFIX) {
14946
2.84M
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
2.84M
        options -= XML_PARSE_NOBASEFIX;
14948
2.84M
    }
14949
7.23M
    if (options & XML_PARSE_HUGE) {
14950
2.67M
  ctxt->options |= XML_PARSE_HUGE;
14951
2.67M
        options -= XML_PARSE_HUGE;
14952
2.67M
        if (ctxt->dict != NULL)
14953
2.67M
            xmlDictSetLimit(ctxt->dict, 0);
14954
2.67M
    }
14955
7.23M
    if (options & XML_PARSE_OLDSAX) {
14956
2.54M
  ctxt->options |= XML_PARSE_OLDSAX;
14957
2.54M
        options -= XML_PARSE_OLDSAX;
14958
2.54M
    }
14959
7.23M
    if (options & XML_PARSE_IGNORE_ENC) {
14960
3.98M
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
3.98M
        options -= XML_PARSE_IGNORE_ENC;
14962
3.98M
    }
14963
7.23M
    if (options & XML_PARSE_BIG_LINES) {
14964
3.16M
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
3.16M
        options -= XML_PARSE_BIG_LINES;
14966
3.16M
    }
14967
7.23M
    ctxt->linenumbers = 1;
14968
7.23M
    return (options);
14969
7.23M
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
4.83M
{
14984
4.83M
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
4.83M
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
2.39M
{
15003
2.39M
    xmlDocPtr ret;
15004
15005
2.39M
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
2.39M
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
2.39M
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
2.39M
        (ctxt->input->filename == NULL))
15015
2.39M
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
2.39M
    xmlParseDocument(ctxt);
15017
2.39M
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
1.59M
        ret = ctxt->myDoc;
15019
796k
    else {
15020
796k
        ret = NULL;
15021
796k
  if (ctxt->myDoc != NULL) {
15022
613k
      xmlFreeDoc(ctxt->myDoc);
15023
613k
  }
15024
796k
    }
15025
2.39M
    ctxt->myDoc = NULL;
15026
2.39M
    if (!reuse) {
15027
2.39M
  xmlFreeParserCtxt(ctxt);
15028
2.39M
    }
15029
15030
2.39M
    return (ret);
15031
2.39M
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
2.41M
{
15096
2.41M
    xmlParserCtxtPtr ctxt;
15097
15098
2.41M
    xmlInitParser();
15099
2.41M
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
2.41M
    if (ctxt == NULL)
15101
21.7k
        return (NULL);
15102
2.39M
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
2.41M
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387