Coverage Report

Created: 2023-07-31 08:15

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
185M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
6.62k
#define XML_PARSER_NON_LINEAR 10
129
130
513M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
315M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
65.7G
#define XML_PARSER_BUFFER_SIZE 100
147
2.44M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
384M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
131k
{
215
131k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
131k
        (ctxt->instate == XML_PARSER_EOF))
217
89
  return;
218
131k
    if (ctxt != NULL)
219
131k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
131k
    if (prefix == NULL)
222
78.0k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
78.0k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
78.0k
                        (const char *) localname, NULL, NULL, 0, 0,
225
78.0k
                        "Attribute %s redefined\n", localname);
226
53.0k
    else
227
53.0k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
53.0k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
53.0k
                        (const char *) prefix, (const char *) localname,
230
53.0k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
53.0k
                        localname);
232
131k
    if (ctxt != NULL) {
233
131k
  ctxt->wellFormed = 0;
234
131k
  if (ctxt->recovery == 0)
235
54.7k
      ctxt->disableSAX = 1;
236
131k
    }
237
131k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
82.2M
{
250
82.2M
    const char *errmsg;
251
252
82.2M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
82.2M
        (ctxt->instate == XML_PARSER_EOF))
254
30.7k
  return;
255
82.2M
    switch (error) {
256
113k
        case XML_ERR_INVALID_HEX_CHARREF:
257
113k
            errmsg = "CharRef: invalid hexadecimal value";
258
113k
            break;
259
152k
        case XML_ERR_INVALID_DEC_CHARREF:
260
152k
            errmsg = "CharRef: invalid decimal value";
261
152k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
1.08M
        case XML_ERR_INTERNAL_ERROR:
266
1.08M
            errmsg = "internal error";
267
1.08M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
7.06M
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
7.06M
            errmsg = "PEReference: expecting ';'";
282
7.06M
            break;
283
4.59k
        case XML_ERR_ENTITY_LOOP:
284
4.59k
            errmsg = "Detected an entity reference loop";
285
4.59k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
5.76k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
5.76k
            errmsg = "PEReferences forbidden in internal subset";
291
5.76k
            break;
292
5.94k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
5.94k
            errmsg = "EntityValue: \" or ' expected";
294
5.94k
            break;
295
116k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
116k
            errmsg = "AttValue: \" or ' expected";
297
116k
            break;
298
539k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
539k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
539k
            break;
301
149k
        case XML_ERR_LITERAL_NOT_STARTED:
302
149k
            errmsg = "SystemLiteral \" or ' expected";
303
149k
            break;
304
58.4k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
58.4k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
58.4k
            break;
307
56.0k
        case XML_ERR_MISPLACED_CDATA_END:
308
56.0k
            errmsg = "Sequence ']]>' not allowed in content";
309
56.0k
            break;
310
128k
        case XML_ERR_URI_REQUIRED:
311
128k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
128k
            break;
313
20.8k
        case XML_ERR_PUBID_REQUIRED:
314
20.8k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
20.8k
            break;
316
70.7M
        case XML_ERR_HYPHEN_IN_COMMENT:
317
70.7M
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
70.7M
            break;
319
39.8k
        case XML_ERR_PI_NOT_STARTED:
320
39.8k
            errmsg = "xmlParsePI : no target name";
321
39.8k
            break;
322
516k
        case XML_ERR_RESERVED_XML_NAME:
323
516k
            errmsg = "Invalid PI name";
324
516k
            break;
325
7.77k
        case XML_ERR_NOTATION_NOT_STARTED:
326
7.77k
            errmsg = "NOTATION: Name expected here";
327
7.77k
            break;
328
36.4k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
36.4k
            errmsg = "'>' required to close NOTATION declaration";
330
36.4k
            break;
331
45.5k
        case XML_ERR_VALUE_REQUIRED:
332
45.5k
            errmsg = "Entity value required";
333
45.5k
            break;
334
5.08k
        case XML_ERR_URI_FRAGMENT:
335
5.08k
            errmsg = "Fragment not allowed";
336
5.08k
            break;
337
24.9k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
24.9k
            errmsg = "'(' required to start ATTLIST enumeration";
339
24.9k
            break;
340
2.55k
        case XML_ERR_NMTOKEN_REQUIRED:
341
2.55k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
2.55k
            break;
343
6.78k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
6.78k
            errmsg = "')' required to finish ATTLIST enumeration";
345
6.78k
            break;
346
5.34k
        case XML_ERR_MIXED_NOT_STARTED:
347
5.34k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
5.34k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
24.2k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
24.2k
            errmsg = "ContentDecl : Name or '(' expected";
354
24.2k
            break;
355
27.6k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
27.6k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
27.6k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
285k
        case XML_ERR_GT_REQUIRED:
363
285k
            errmsg = "expected '>'";
364
285k
            break;
365
518
        case XML_ERR_CONDSEC_INVALID:
366
518
            errmsg = "XML conditional section '[' expected";
367
518
            break;
368
41.2k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
41.2k
            errmsg = "Content error in the external subset";
370
41.2k
            break;
371
2.63k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
2.63k
            errmsg =
373
2.63k
                "conditional section INCLUDE or IGNORE keyword expected";
374
2.63k
            break;
375
3.08k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
3.08k
            errmsg = "XML conditional section not closed";
377
3.08k
            break;
378
514
        case XML_ERR_XMLDECL_NOT_STARTED:
379
514
            errmsg = "Text declaration '<?xml' required";
380
514
            break;
381
134k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
134k
            errmsg = "parsing XML declaration: '?>' expected";
383
134k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
368k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
368k
            errmsg = "EntityRef: expecting ';'";
389
368k
            break;
390
37.1k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
37.1k
            errmsg = "DOCTYPE improperly terminated";
392
37.1k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
6.85k
        case XML_ERR_EQUAL_REQUIRED:
397
6.85k
            errmsg = "expected '='";
398
6.85k
            break;
399
28.6k
        case XML_ERR_STRING_NOT_CLOSED:
400
28.6k
            errmsg = "String not closed expecting \" or '";
401
28.6k
            break;
402
8.06k
        case XML_ERR_STRING_NOT_STARTED:
403
8.06k
            errmsg = "String not started expecting ' or \"";
404
8.06k
            break;
405
970
        case XML_ERR_ENCODING_NAME:
406
970
            errmsg = "Invalid XML encoding name";
407
970
            break;
408
1.66k
        case XML_ERR_STANDALONE_VALUE:
409
1.66k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.66k
            break;
411
36.8k
        case XML_ERR_DOCUMENT_EMPTY:
412
36.8k
            errmsg = "Document is empty";
413
36.8k
            break;
414
252k
        case XML_ERR_DOCUMENT_END:
415
252k
            errmsg = "Extra content at the end of the document";
416
252k
            break;
417
17.4k
        case XML_ERR_NOT_WELL_BALANCED:
418
17.4k
            errmsg = "chunk is not well balanced";
419
17.4k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
73.3k
        case XML_ERR_VERSION_MISSING:
424
73.3k
            errmsg = "Malformed declaration expecting version";
425
73.3k
            break;
426
1.59k
        case XML_ERR_NAME_TOO_LONG:
427
1.59k
            errmsg = "Name too long";
428
1.59k
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
6.00k
        default:
435
6.00k
            errmsg = "Unregistered error message";
436
82.2M
    }
437
82.2M
    if (ctxt != NULL)
438
82.2M
  ctxt->errNo = error;
439
82.2M
    if (info == NULL) {
440
81.1M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
81.1M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
81.1M
                        errmsg);
443
81.1M
    } else {
444
1.08M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
1.08M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
1.08M
                        errmsg, info);
447
1.08M
    }
448
82.2M
    if (ctxt != NULL) {
449
82.2M
  ctxt->wellFormed = 0;
450
82.2M
  if (ctxt->recovery == 0)
451
5.00M
      ctxt->disableSAX = 1;
452
82.2M
    }
453
82.2M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
8.51M
{
467
8.51M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
8.51M
        (ctxt->instate == XML_PARSER_EOF))
469
1.73k
  return;
470
8.51M
    if (ctxt != NULL)
471
8.51M
  ctxt->errNo = error;
472
8.51M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
8.51M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
8.51M
    if (ctxt != NULL) {
475
8.51M
  ctxt->wellFormed = 0;
476
8.51M
  if (ctxt->recovery == 0)
477
2.01M
      ctxt->disableSAX = 1;
478
8.51M
    }
479
8.51M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
47.7M
{
495
47.7M
    xmlStructuredErrorFunc schannel = NULL;
496
497
47.7M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
47.7M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
47.7M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
47.7M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
23.4M
        schannel = ctxt->sax->serror;
503
47.7M
    if (ctxt != NULL) {
504
47.7M
        __xmlRaiseError(schannel,
505
47.7M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
47.7M
                    ctxt->userData,
507
47.7M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
47.7M
                    XML_ERR_WARNING, NULL, 0,
509
47.7M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
47.7M
        msg, (const char *) str1, (const char *) str2);
511
47.7M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
47.7M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
11.4M
{
533
11.4M
    xmlStructuredErrorFunc schannel = NULL;
534
535
11.4M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
11.4M
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
11.4M
    if (ctxt != NULL) {
539
11.4M
  ctxt->errNo = error;
540
11.4M
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
8.24M
      schannel = ctxt->sax->serror;
542
11.4M
    }
543
11.4M
    if (ctxt != NULL) {
544
11.4M
        __xmlRaiseError(schannel,
545
11.4M
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
11.4M
                    ctxt, NULL, XML_FROM_DTD, error,
547
11.4M
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
11.4M
        (const char *) str2, NULL, 0, 0,
549
11.4M
        msg, (const char *) str1, (const char *) str2);
550
11.4M
  ctxt->valid = 0;
551
11.4M
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
11.4M
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
6.29M
{
573
6.29M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
6.29M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
6.29M
    if (ctxt != NULL)
577
6.29M
  ctxt->errNo = error;
578
6.29M
    __xmlRaiseError(NULL, NULL, NULL,
579
6.29M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
6.29M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
6.29M
    if (ctxt != NULL) {
582
6.29M
  ctxt->wellFormed = 0;
583
6.29M
  if (ctxt->recovery == 0)
584
443k
      ctxt->disableSAX = 1;
585
6.29M
    }
586
6.29M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
1.80M
{
604
1.80M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
1.80M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
1.80M
    if (ctxt != NULL)
608
1.80M
  ctxt->errNo = error;
609
1.80M
    __xmlRaiseError(NULL, NULL, NULL,
610
1.80M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
1.80M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
1.80M
        NULL, val, 0, msg, str1, val, str2);
613
1.80M
    if (ctxt != NULL) {
614
1.80M
  ctxt->wellFormed = 0;
615
1.80M
  if (ctxt->recovery == 0)
616
535k
      ctxt->disableSAX = 1;
617
1.80M
    }
618
1.80M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
210M
{
633
210M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
210M
        (ctxt->instate == XML_PARSER_EOF))
635
71
  return;
636
210M
    if (ctxt != NULL)
637
210M
  ctxt->errNo = error;
638
210M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
210M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
210M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
210M
                    val);
642
210M
    if (ctxt != NULL) {
643
210M
  ctxt->wellFormed = 0;
644
210M
  if (ctxt->recovery == 0)
645
79.1M
      ctxt->disableSAX = 1;
646
210M
    }
647
210M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
613k
{
662
613k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
613k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
613k
    if (ctxt != NULL)
666
613k
  ctxt->errNo = error;
667
613k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
613k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
613k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
613k
                    val);
671
613k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
1.46M
{
689
1.46M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
1.46M
        (ctxt->instate == XML_PARSER_EOF))
691
298
  return;
692
1.46M
    if (ctxt != NULL)
693
1.46M
  ctxt->errNo = error;
694
1.46M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
1.46M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
1.46M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
1.46M
                    info1, info2, info3);
698
1.46M
    if (ctxt != NULL)
699
1.46M
  ctxt->nsWellFormed = 0;
700
1.46M
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
124k
{
718
124k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
124k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
124k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
124k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
124k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
124k
                    info1, info2, info3);
725
124k
}
726
727
static void
728
1.71G
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
1.71G
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
1.71G
    else
732
1.71G
        *dst += val;
733
1.71G
}
734
735
static void
736
521M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
521M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
521M
    else
740
521M
        *dst += val;
741
521M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
513M
{
770
513M
    unsigned long consumed;
771
513M
    xmlParserInputPtr input = ctxt->input;
772
513M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
513M
    consumed = input->parentConsumed;
779
513M
    if ((entity == NULL) ||
780
513M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
351M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
162M
        xmlSaturatedAdd(&consumed, input->consumed);
783
162M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
162M
    }
785
513M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
513M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
513M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
513M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
513M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
6.62k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
6.62k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
6.62k
                       "Maximum entity amplification factor exceeded");
803
6.62k
        xmlHaltParser(ctxt);
804
6.62k
        return(1);
805
6.62k
    }
806
807
513M
    return(0);
808
513M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
2.20M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
2.20M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
2.20M
    (void) sax;
1048
1049
2.20M
    if (ctxt == NULL) return;
1050
2.20M
    sax = ctxt->sax;
1051
2.20M
#ifdef LIBXML_SAX1_ENABLED
1052
2.20M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
2.20M
        ((sax->startElementNs != NULL) ||
1054
1.43M
         (sax->endElementNs != NULL) ||
1055
1.43M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.43M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
2.20M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
2.20M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
2.20M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
2.20M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
2.20M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
2.20M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
597k
{
1103
597k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
1.04M
    while (*src == 0x20) src++;
1107
9.86M
    while (*src != 0) {
1108
9.26M
  if (*src == 0x20) {
1109
3.94M
      while (*src == 0x20) src++;
1110
565k
      if (*src != 0)
1111
483k
    *dst++ = 0x20;
1112
8.69M
  } else {
1113
8.69M
      *dst++ = *src++;
1114
8.69M
  }
1115
9.26M
    }
1116
597k
    *dst = 0;
1117
597k
    if (dst == src)
1118
443k
       return(NULL);
1119
154k
    return(dst);
1120
597k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
224k
{
1136
224k
    int i;
1137
224k
    int remove_head = 0;
1138
224k
    int need_realloc = 0;
1139
224k
    const xmlChar *cur;
1140
1141
224k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
224k
    i = *len;
1144
224k
    if (i <= 0)
1145
17.0k
        return(NULL);
1146
1147
207k
    cur = src;
1148
281k
    while (*cur == 0x20) {
1149
73.3k
        cur++;
1150
73.3k
  remove_head++;
1151
73.3k
    }
1152
3.93M
    while (*cur != 0) {
1153
3.77M
  if (*cur == 0x20) {
1154
176k
      cur++;
1155
176k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
43.1k
          need_realloc = 1;
1157
43.1k
    break;
1158
43.1k
      }
1159
176k
  } else
1160
3.59M
      cur++;
1161
3.77M
    }
1162
207k
    if (need_realloc) {
1163
43.1k
        xmlChar *ret;
1164
1165
43.1k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
43.1k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
43.1k
  xmlAttrNormalizeSpace(ret, ret);
1171
43.1k
  *len = strlen((const char *)ret);
1172
43.1k
        return(ret);
1173
164k
    } else if (remove_head) {
1174
8.51k
        *len -= remove_head;
1175
8.51k
        memmove(src, src + remove_head, 1 + *len);
1176
8.51k
  return(src);
1177
8.51k
    }
1178
156k
    return(NULL);
1179
207k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
954k
               const xmlChar *value) {
1195
954k
    xmlDefAttrsPtr defaults;
1196
954k
    int len;
1197
954k
    const xmlChar *name;
1198
954k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
954k
    if (ctxt->attsSpecial != NULL) {
1204
905k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
225k
      return;
1206
905k
    }
1207
1208
728k
    if (ctxt->attsDefault == NULL) {
1209
81.8k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
81.8k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
81.8k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
728k
    name = xmlSplitQName3(fullname, &len);
1219
728k
    if (name == NULL) {
1220
703k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
703k
  prefix = NULL;
1222
703k
    } else {
1223
25.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
25.2k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
25.2k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
728k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
728k
    if (defaults == NULL) {
1232
401k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
401k
                     (4 * 5) * sizeof(const xmlChar *));
1234
401k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
401k
  defaults->nbAttrs = 0;
1237
401k
  defaults->maxAttrs = 4;
1238
401k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
401k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
401k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
6.71k
        xmlDefAttrsPtr temp;
1245
1246
6.71k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
6.71k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
6.71k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
6.71k
  defaults = temp;
1251
6.71k
  defaults->maxAttrs *= 2;
1252
6.71k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
6.71k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
6.71k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
728k
    name = xmlSplitQName3(fullattr, &len);
1264
728k
    if (name == NULL) {
1265
614k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
614k
  prefix = NULL;
1267
614k
    } else {
1268
113k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
113k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
113k
    }
1271
1272
728k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
728k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
728k
    len = xmlStrlen(value);
1276
728k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
728k
    if (value == NULL)
1278
0
        goto mem_error;
1279
728k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
728k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
728k
    if (ctxt->external)
1282
410k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
318k
    else
1284
318k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
728k
    defaults->nbAttrs++;
1286
1287
728k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
728k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
11.3M
{
1309
11.3M
    if (ctxt->attsSpecial == NULL) {
1310
136k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
136k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
136k
    }
1314
1315
11.3M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
1.00M
        return;
1317
1318
10.3M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
10.3M
                     (void *) (ptrdiff_t) type);
1320
10.3M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
11.3M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
8.32M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
8.32M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
8.32M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
3.20M
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
3.20M
    }
1341
8.32M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
519k
{
1354
519k
    if (ctxt->attsSpecial == NULL)
1355
408k
        return;
1356
1357
110k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
110k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
17.8k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
17.8k
        ctxt->attsSpecial = NULL;
1362
17.8k
    }
1363
110k
    return;
1364
519k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
95.9k
{
1427
95.9k
    const xmlChar *cur = lang, *nxt;
1428
1429
95.9k
    if (cur == NULL)
1430
1.22k
        return (0);
1431
94.6k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
94.6k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
94.6k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
94.6k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
4.11k
        cur += 2;
1441
26.4k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
26.4k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
22.3k
            cur++;
1444
4.11k
        return(cur[0] == 0);
1445
4.11k
    }
1446
90.5k
    nxt = cur;
1447
341k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
341k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
251k
           nxt++;
1450
90.5k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
8.92k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
5.15k
            return(0);
1456
3.77k
        return(1);
1457
8.92k
    }
1458
81.6k
    if (nxt - cur < 2)
1459
9.29k
        return(0);
1460
    /* we got an ISO 639 code */
1461
72.3k
    if (nxt[0] == 0)
1462
33.6k
        return(1);
1463
38.7k
    if (nxt[0] != '-')
1464
2.19k
        return(0);
1465
1466
36.5k
    nxt++;
1467
36.5k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
36.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
1.53k
        goto region_m49;
1471
1472
146k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
146k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
111k
           nxt++;
1475
34.9k
    if (nxt - cur == 4)
1476
8.82k
        goto script;
1477
26.1k
    if (nxt - cur == 2)
1478
4.79k
        goto region;
1479
21.3k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
1.24k
        goto variant;
1481
20.1k
    if (nxt - cur != 3)
1482
4.56k
        return(0);
1483
    /* we parsed an extlang */
1484
15.5k
    if (nxt[0] == 0)
1485
1.33k
        return(1);
1486
14.2k
    if (nxt[0] != '-')
1487
1.19k
        return(0);
1488
1489
13.0k
    nxt++;
1490
13.0k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
13.0k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
1.29k
        goto region_m49;
1494
1495
51.7k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
51.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
39.9k
           nxt++;
1498
11.7k
    if (nxt - cur == 2)
1499
3.29k
        goto region;
1500
8.43k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
617
        goto variant;
1502
7.82k
    if (nxt - cur != 4)
1503
4.78k
        return(0);
1504
    /* we parsed a script */
1505
11.8k
script:
1506
11.8k
    if (nxt[0] == 0)
1507
2.04k
        return(1);
1508
9.81k
    if (nxt[0] != '-')
1509
1.41k
        return(0);
1510
1511
8.39k
    nxt++;
1512
8.39k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
8.39k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
1.33k
        goto region_m49;
1516
1517
48.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
48.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
41.3k
           nxt++;
1520
1521
7.06k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
812
        goto variant;
1523
6.24k
    if (nxt - cur != 2)
1524
2.40k
        return(0);
1525
    /* we parsed a region */
1526
13.5k
region:
1527
13.5k
    if (nxt[0] == 0)
1528
3.60k
        return(1);
1529
9.97k
    if (nxt[0] != '-')
1530
5.44k
        return(0);
1531
1532
4.52k
    nxt++;
1533
4.52k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
25.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
25.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
21.0k
           nxt++;
1538
1539
4.52k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
3.21k
        return(0);
1541
1542
    /* we parsed a variant */
1543
3.97k
variant:
1544
3.97k
    if (nxt[0] == 0)
1545
710
        return(1);
1546
3.26k
    if (nxt[0] != '-')
1547
2.85k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
412
    return (1);
1550
1551
4.16k
region_m49:
1552
4.16k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
4.16k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
1.63k
        nxt += 3;
1555
1.63k
        goto region;
1556
1.63k
    }
1557
2.52k
    return(0);
1558
4.16k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
945k
{
1584
945k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
304k
        int i;
1586
2.52M
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
2.44M
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
226k
          if (ctxt->nsTab[i + 1] == URL)
1590
86.2k
        return(-2);
1591
    /* out of scope keep it */
1592
139k
    break;
1593
226k
      }
1594
2.44M
  }
1595
304k
    }
1596
859k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
90.1k
  ctxt->nsMax = 10;
1598
90.1k
  ctxt->nsNr = 0;
1599
90.1k
  ctxt->nsTab = (const xmlChar **)
1600
90.1k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
90.1k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
769k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
15.8k
        const xmlChar ** tmp;
1608
15.8k
        ctxt->nsMax *= 2;
1609
15.8k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
15.8k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
15.8k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
15.8k
  ctxt->nsTab = tmp;
1617
15.8k
    }
1618
859k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
859k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
859k
    return (ctxt->nsNr);
1621
859k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
237k
{
1634
237k
    int i;
1635
1636
237k
    if (ctxt->nsTab == NULL) return(0);
1637
237k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
237k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
970k
    for (i = 0;i < nr;i++) {
1645
733k
         ctxt->nsNr--;
1646
733k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
733k
    }
1648
237k
    return(nr);
1649
237k
}
1650
#endif
1651
1652
static int
1653
213k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
213k
    const xmlChar **atts;
1655
213k
    int *attallocs;
1656
213k
    int maxatts;
1657
1658
213k
    if (nr + 5 > ctxt->maxatts) {
1659
213k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
213k
  atts = (const xmlChar **) xmlMalloc(
1661
213k
             maxatts * sizeof(const xmlChar *));
1662
213k
  if (atts == NULL) goto mem_error;
1663
213k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
213k
                               (maxatts / 5) * sizeof(int));
1665
213k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
213k
        if (ctxt->maxatts > 0)
1670
1.95k
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
213k
        xmlFree(ctxt->atts);
1672
213k
  ctxt->atts = atts;
1673
213k
  ctxt->attallocs = attallocs;
1674
213k
  ctxt->maxatts = maxatts;
1675
213k
    }
1676
213k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
213k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
353M
{
1694
353M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
353M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
11.7k
        size_t newSize = ctxt->inputMax * 2;
1698
11.7k
        xmlParserInputPtr *tmp;
1699
1700
11.7k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
11.7k
                                               newSize * sizeof(*tmp));
1702
11.7k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
11.7k
        ctxt->inputTab = tmp;
1707
11.7k
        ctxt->inputMax = newSize;
1708
11.7k
    }
1709
353M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
353M
    ctxt->input = value;
1711
353M
    return (ctxt->inputNr++);
1712
353M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
359M
{
1724
359M
    xmlParserInputPtr ret;
1725
1726
359M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
359M
    if (ctxt->inputNr <= 0)
1729
6.07M
        return (NULL);
1730
353M
    ctxt->inputNr--;
1731
353M
    if (ctxt->inputNr > 0)
1732
351M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
1.63M
    else
1734
1.63M
        ctxt->input = NULL;
1735
353M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
353M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
353M
    return (ret);
1738
359M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
60.7M
{
1751
60.7M
    if (ctxt == NULL) return(0);
1752
60.7M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
48.7k
        xmlNodePtr *tmp;
1754
1755
48.7k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
48.7k
                                      ctxt->nodeMax * 2 *
1757
48.7k
                                      sizeof(ctxt->nodeTab[0]));
1758
48.7k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
48.7k
        ctxt->nodeTab = tmp;
1763
48.7k
  ctxt->nodeMax *= 2;
1764
48.7k
    }
1765
60.7M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
60.7M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
287
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
287
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
287
        xmlParserMaxDepth);
1770
287
  xmlHaltParser(ctxt);
1771
287
  return(-1);
1772
287
    }
1773
60.7M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
60.7M
    ctxt->node = value;
1775
60.7M
    return (ctxt->nodeNr++);
1776
60.7M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
59.0M
{
1789
59.0M
    xmlNodePtr ret;
1790
1791
59.0M
    if (ctxt == NULL) return(NULL);
1792
59.0M
    if (ctxt->nodeNr <= 0)
1793
416k
        return (NULL);
1794
58.5M
    ctxt->nodeNr--;
1795
58.5M
    if (ctxt->nodeNr > 0)
1796
57.7M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
799k
    else
1798
799k
        ctxt->node = NULL;
1799
58.5M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
58.5M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
58.5M
    return (ret);
1802
59.0M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
48.4M
{
1821
48.4M
    xmlStartTag *tag;
1822
1823
48.4M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
83.1k
        const xmlChar * *tmp;
1825
83.1k
        xmlStartTag *tmp2;
1826
83.1k
        ctxt->nameMax *= 2;
1827
83.1k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
83.1k
                                    ctxt->nameMax *
1829
83.1k
                                    sizeof(ctxt->nameTab[0]));
1830
83.1k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
83.1k
  ctxt->nameTab = tmp;
1835
83.1k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
83.1k
                                    ctxt->nameMax *
1837
83.1k
                                    sizeof(ctxt->pushTab[0]));
1838
83.1k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
83.1k
  ctxt->pushTab = tmp2;
1843
48.3M
    } else if (ctxt->pushTab == NULL) {
1844
744k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
744k
                                            sizeof(ctxt->pushTab[0]));
1846
744k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
744k
    }
1849
48.4M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
48.4M
    ctxt->name = value;
1851
48.4M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
48.4M
    tag->prefix = prefix;
1853
48.4M
    tag->URI = URI;
1854
48.4M
    tag->line = line;
1855
48.4M
    tag->nsNr = nsNr;
1856
48.4M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
48.4M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
9.59M
{
1873
9.59M
    const xmlChar *ret;
1874
1875
9.59M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
9.59M
    ctxt->nameNr--;
1878
9.59M
    if (ctxt->nameNr > 0)
1879
9.53M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
59.5k
    else
1881
59.5k
        ctxt->name = NULL;
1882
9.59M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
9.59M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
9.59M
    return (ret);
1885
9.59M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
35.6M
{
1931
35.6M
    const xmlChar *ret;
1932
1933
35.6M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
35.6M
    ctxt->nameNr--;
1936
35.6M
    if (ctxt->nameNr > 0)
1937
35.3M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
300k
    else
1939
300k
        ctxt->name = NULL;
1940
35.6M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
35.6M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
35.6M
    return (ret);
1943
35.6M
}
1944
1945
68.0M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
68.0M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
69.0k
        int *tmp;
1948
1949
69.0k
  ctxt->spaceMax *= 2;
1950
69.0k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
69.0k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
69.0k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
69.0k
  ctxt->spaceTab = tmp;
1958
69.0k
    }
1959
68.0M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
68.0M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
68.0M
    return(ctxt->spaceNr++);
1962
68.0M
}
1963
1964
66.3M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
66.3M
    int ret;
1966
66.3M
    if (ctxt->spaceNr <= 0) return(0);
1967
66.2M
    ctxt->spaceNr--;
1968
66.2M
    if (ctxt->spaceNr > 0)
1969
66.0M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
177k
    else
1971
177k
        ctxt->space = &ctxt->spaceTab[0];
1972
66.2M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
66.2M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
66.2M
    return(ret);
1975
66.3M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
3.89G
#define RAW (*ctxt->input->cur)
2013
3.36G
#define CUR (*ctxt->input->cur)
2014
3.15G
#define NXT(val) ctxt->input->cur[(val)]
2015
162M
#define CUR_PTR ctxt->input->cur
2016
1.57M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
488M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
244M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
448M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
376M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
317M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
259M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
114M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
114M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
307k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
307k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
1.40G
#define SKIP(val) do {             \
2037
1.40G
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
1.40G
    if (*ctxt->input->cur == 0)           \
2039
1.40G
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
1.40G
  } while (0)
2041
2042
325k
#define SKIPL(val) do {             \
2043
325k
    int skipl;                \
2044
74.7M
    for(skipl=0; skipl<val; skipl++) {         \
2045
74.4M
  if (*(ctxt->input->cur) == '\n') {       \
2046
1.69M
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
72.7M
  } else ctxt->input->col++;         \
2048
74.4M
  ctxt->input->cur++;           \
2049
74.4M
    }                  \
2050
325k
    if (*ctxt->input->cur == 0)           \
2051
325k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
325k
  } while (0)
2053
2054
1.35G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
1.35G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
1.35G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
1.35G
  xmlSHRINK (ctxt);
2058
2059
16.2M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
16.2M
    if ((ctxt->input->buf) &&
2062
16.2M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
17.2k
        xmlParserInputShrink(ctxt->input);
2064
16.2M
    if (*ctxt->input->cur == 0)
2065
274k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
16.2M
}
2067
2068
4.01G
#define GROW if ((ctxt->progressive == 0) &&       \
2069
4.01G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
4.01G
  xmlGROW (ctxt);
2071
2072
822M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
822M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
822M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
822M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
822M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
822M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
822M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
822M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
822M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
822M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
822M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
17.4M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
822M
}
2095
2096
949M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
1.87G
#define NEXT xmlNextChar(ctxt)
2099
2100
111M
#define NEXT1 {               \
2101
111M
  ctxt->input->col++;           \
2102
111M
  ctxt->input->cur++;           \
2103
111M
  if (*ctxt->input->cur == 0)         \
2104
111M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
111M
    }
2106
2107
1.81G
#define NEXTL(l) do {             \
2108
1.81G
    if (*(ctxt->input->cur) == '\n') {         \
2109
10.3M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
1.80G
    } else ctxt->input->col++;           \
2111
1.81G
    ctxt->input->cur += l;        \
2112
1.81G
  } while (0)
2113
2114
1.89G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
32.9G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
25.6G
    if (l == 1) b[i++] = v;           \
2119
25.6G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
949M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
949M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
949M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
949M
        (ctxt->instate == XML_PARSER_START)) {
2141
306M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
306M
  cur = ctxt->input->cur;
2146
306M
  while (IS_BLANK_CH(*cur)) {
2147
97.9M
      if (*cur == '\n') {
2148
3.96M
    ctxt->input->line++; ctxt->input->col = 1;
2149
93.9M
      } else {
2150
93.9M
    ctxt->input->col++;
2151
93.9M
      }
2152
97.9M
      cur++;
2153
97.9M
      if (res < INT_MAX)
2154
97.9M
    res++;
2155
97.9M
      if (*cur == 0) {
2156
156k
    ctxt->input->cur = cur;
2157
156k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
156k
    cur = ctxt->input->cur;
2159
156k
      }
2160
97.9M
  }
2161
306M
  ctxt->input->cur = cur;
2162
642M
    } else {
2163
642M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
2.12G
  while (ctxt->instate != XML_PARSER_EOF) {
2166
2.12G
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
715M
    NEXT;
2168
1.41G
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
421M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
3.73M
                    break;
2174
417M
          xmlParsePEReference(ctxt);
2175
990M
            } else if (CUR == 0) {
2176
351M
                unsigned long consumed;
2177
351M
                xmlEntityPtr ent;
2178
2179
351M
                if (ctxt->inputNr <= 1)
2180
109k
                    break;
2181
2182
351M
                consumed = ctxt->input->consumed;
2183
351M
                xmlSaturatedAddSizeT(&consumed,
2184
351M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
351M
                ent = ctxt->input->entity;
2191
351M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
351M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
19.0k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
19.0k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
19.0k
                }
2197
2198
351M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
351M
                xmlPopInput(ctxt);
2201
639M
            } else {
2202
639M
                break;
2203
639M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
1.48G
      if (res < INT_MAX)
2213
1.48G
    res++;
2214
1.48G
        }
2215
642M
    }
2216
949M
    return(res);
2217
949M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
351M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
351M
    xmlParserInputPtr input;
2237
2238
351M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
351M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
351M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
351M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
351M
    input = inputPop(ctxt);
2247
351M
    if (input->entity != NULL)
2248
351M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
351M
    xmlFreeInputStream(input);
2250
351M
    if (*ctxt->input->cur == 0)
2251
165M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
351M
    return(CUR);
2253
351M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
351M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
351M
    int ret;
2267
351M
    if (input == NULL) return(-1);
2268
2269
351M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
351M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
351M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
351M
    ret = inputPush(ctxt, input);
2285
351M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
351M
    GROW;
2288
351M
    return(ret);
2289
351M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
2.43M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
2.43M
    int val = 0;
2311
2.43M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
2.43M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
2.43M
        (NXT(2) == 'x')) {
2318
1.24M
  SKIP(3);
2319
1.24M
  GROW;
2320
3.44M
  while (RAW != ';') { /* loop blocked by count */
2321
2.31M
      if (count++ > 20) {
2322
70.4k
    count = 0;
2323
70.4k
    GROW;
2324
70.4k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
70.4k
      }
2327
2.31M
      if ((RAW >= '0') && (RAW <= '9'))
2328
1.13M
          val = val * 16 + (CUR - '0');
2329
1.17M
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
922k
          val = val * 16 + (CUR - 'a') + 10;
2331
250k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
140k
          val = val * 16 + (CUR - 'A') + 10;
2333
109k
      else {
2334
109k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
109k
    val = 0;
2336
109k
    break;
2337
109k
      }
2338
2.20M
      if (val > 0x110000)
2339
778k
          val = 0x110000;
2340
2341
2.20M
      NEXT;
2342
2.20M
      count++;
2343
2.20M
  }
2344
1.24M
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
1.13M
      ctxt->input->col++;
2347
1.13M
      ctxt->input->cur++;
2348
1.13M
  }
2349
1.24M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
1.19M
  SKIP(2);
2351
1.19M
  GROW;
2352
4.78M
  while (RAW != ';') { /* loop blocked by count */
2353
3.73M
      if (count++ > 20) {
2354
74.3k
    count = 0;
2355
74.3k
    GROW;
2356
74.3k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
74.3k
      }
2359
3.73M
      if ((RAW >= '0') && (RAW <= '9'))
2360
3.59M
          val = val * 10 + (CUR - '0');
2361
146k
      else {
2362
146k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
146k
    val = 0;
2364
146k
    break;
2365
146k
      }
2366
3.59M
      if (val > 0x110000)
2367
809k
          val = 0x110000;
2368
2369
3.59M
      NEXT;
2370
3.59M
      count++;
2371
3.59M
  }
2372
1.19M
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
1.04M
      ctxt->input->col++;
2375
1.04M
      ctxt->input->cur++;
2376
1.04M
  }
2377
1.19M
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
2.43M
    if (val >= 0x110000) {
2389
4.95k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
4.95k
                "xmlParseCharRef: character reference out of bounds\n",
2391
4.95k
          val);
2392
2.43M
    } else if (IS_CHAR(val)) {
2393
2.14M
        return(val);
2394
2.14M
    } else {
2395
285k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
285k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
285k
                    val);
2398
285k
    }
2399
290k
    return(0);
2400
2.43M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
1.21M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
1.21M
    const xmlChar *ptr;
2423
1.21M
    xmlChar cur;
2424
1.21M
    int val = 0;
2425
2426
1.21M
    if ((str == NULL) || (*str == NULL)) return(0);
2427
1.21M
    ptr = *str;
2428
1.21M
    cur = *ptr;
2429
1.21M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
110k
  ptr += 3;
2431
110k
  cur = *ptr;
2432
347k
  while (cur != ';') { /* Non input consuming loop */
2433
241k
      if ((cur >= '0') && (cur <= '9'))
2434
124k
          val = val * 16 + (cur - '0');
2435
116k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
15.8k
          val = val * 16 + (cur - 'a') + 10;
2437
100k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
97.0k
          val = val * 16 + (cur - 'A') + 10;
2439
3.29k
      else {
2440
3.29k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
3.29k
    val = 0;
2442
3.29k
    break;
2443
3.29k
      }
2444
237k
      if (val > 0x110000)
2445
47.5k
          val = 0x110000;
2446
2447
237k
      ptr++;
2448
237k
      cur = *ptr;
2449
237k
  }
2450
110k
  if (cur == ';')
2451
106k
      ptr++;
2452
1.10M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
1.10M
  ptr += 2;
2454
1.10M
  cur = *ptr;
2455
3.68M
  while (cur != ';') { /* Non input consuming loops */
2456
2.59M
      if ((cur >= '0') && (cur <= '9'))
2457
2.58M
          val = val * 10 + (cur - '0');
2458
5.57k
      else {
2459
5.57k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
5.57k
    val = 0;
2461
5.57k
    break;
2462
5.57k
      }
2463
2.58M
      if (val > 0x110000)
2464
49.4k
          val = 0x110000;
2465
2466
2.58M
      ptr++;
2467
2.58M
      cur = *ptr;
2468
2.58M
  }
2469
1.10M
  if (cur == ';')
2470
1.09M
      ptr++;
2471
1.10M
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
1.21M
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
1.21M
    if (val >= 0x110000) {
2483
1.14k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
1.14k
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
1.14k
                val);
2486
1.21M
    } else if (IS_CHAR(val)) {
2487
1.20M
        return(val);
2488
1.20M
    } else {
2489
11.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
11.8k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
11.8k
        val);
2492
11.8k
    }
2493
12.9k
    return(0);
2494
1.21M
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
17.1M
#define growBuffer(buffer, n) {           \
2593
17.1M
    xmlChar *tmp;             \
2594
17.1M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
17.1M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
17.1M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
17.1M
    if (tmp == NULL) goto mem_error;         \
2598
17.1M
    buffer = tmp;             \
2599
17.1M
    buffer##_size = new_size;                                           \
2600
17.1M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
160M
                           int check) {
2617
160M
    xmlChar *buffer = NULL;
2618
160M
    size_t buffer_size = 0;
2619
160M
    size_t nbchars = 0;
2620
2621
160M
    xmlChar *current = NULL;
2622
160M
    xmlChar *rep = NULL;
2623
160M
    const xmlChar *last;
2624
160M
    xmlEntityPtr ent;
2625
160M
    int c,l;
2626
2627
160M
    if (str == NULL)
2628
83.7k
        return(NULL);
2629
160M
    last = str + len;
2630
2631
160M
    if (((ctxt->depth > 40) &&
2632
160M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
160M
  (ctxt->depth > 100)) {
2634
14
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
14
                       "Maximum entity nesting depth exceeded");
2636
14
  return(NULL);
2637
14
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
160M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
160M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
160M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
160M
    if (str < last)
2651
159M
  c = CUR_SCHAR(str, l);
2652
1.00M
    else
2653
1.00M
        c = 0;
2654
19.0G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
19.0G
           (c != end2) && (c != end3) &&
2656
19.0G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
18.9G
  if (c == 0) break;
2659
18.9G
        if ((c == '&') && (str[1] == '#')) {
2660
1.21M
      int val = xmlParseStringCharRef(ctxt, &str);
2661
1.21M
      if (val == 0)
2662
12.9k
                goto int_error;
2663
1.20M
      COPY_BUF(0,buffer,nbchars,val);
2664
1.20M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
754
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
754
      }
2667
18.9G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
8.45G
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
8.45G
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
8.45G
      if ((ent != NULL) &&
2674
8.45G
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
539k
    if (ent->content != NULL) {
2676
539k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
539k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
1.67k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
1.67k
        }
2680
539k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
8.45G
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
148M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
2.32k
                    goto int_error;
2688
2689
148M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
1.32k
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
1.32k
                    xmlHaltParser(ctxt);
2692
1.32k
                    ent->content[0] = 0;
2693
1.32k
                    goto int_error;
2694
1.32k
                }
2695
2696
148M
                ent->flags |= XML_ENT_EXPANDING;
2697
148M
    ctxt->depth++;
2698
148M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
148M
                        ent->length, what, 0, 0, 0, check);
2700
148M
    ctxt->depth--;
2701
148M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
148M
    if (rep == NULL) {
2704
31.8k
                    ent->content[0] = 0;
2705
31.8k
                    goto int_error;
2706
31.8k
                }
2707
2708
148M
                current = rep;
2709
50.7G
                while (*current != 0) { /* non input consuming loop */
2710
50.5G
                    buffer[nbchars++] = *current++;
2711
50.5G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
23.4M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
23.4M
                    }
2714
50.5G
                }
2715
148M
                xmlFree(rep);
2716
148M
                rep = NULL;
2717
8.30G
      } else if (ent != NULL) {
2718
33.2M
    int i = xmlStrlen(ent->name);
2719
33.2M
    const xmlChar *cur = ent->name;
2720
2721
33.2M
    buffer[nbchars++] = '&';
2722
33.2M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
5.39M
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
5.39M
    }
2725
2.22G
    for (;i > 0;i--)
2726
2.19G
        buffer[nbchars++] = *cur++;
2727
33.2M
    buffer[nbchars++] = ';';
2728
33.2M
      }
2729
10.4G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
3.05M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
3.05M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
3.05M
      if (ent != NULL) {
2735
2.87M
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
11.9k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
11.9k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
11.9k
      (ctxt->validate != 0)) {
2745
11.1k
      xmlLoadEntityContent(ctxt, ent);
2746
11.1k
        } else {
2747
863
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
863
      "not validating will not read content for PE entity %s\n",
2749
863
                          ent->name, NULL);
2750
863
        }
2751
11.9k
    }
2752
2753
2.87M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
271
                    goto int_error;
2755
2756
2.87M
                if (ent->flags & XML_ENT_EXPANDING) {
2757
793
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
793
                    xmlHaltParser(ctxt);
2759
793
                    if (ent->content != NULL)
2760
278
                        ent->content[0] = 0;
2761
793
                    goto int_error;
2762
793
                }
2763
2764
2.87M
                ent->flags |= XML_ENT_EXPANDING;
2765
2.87M
    ctxt->depth++;
2766
2.87M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
2.87M
                        ent->length, what, 0, 0, 0, check);
2768
2.87M
    ctxt->depth--;
2769
2.87M
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
2.87M
    if (rep == NULL) {
2772
6.22k
                    if (ent->content != NULL)
2773
412
                        ent->content[0] = 0;
2774
6.22k
                    goto int_error;
2775
6.22k
                }
2776
2.86M
                current = rep;
2777
4.26G
                while (*current != 0) { /* non input consuming loop */
2778
4.25G
                    buffer[nbchars++] = *current++;
2779
4.25G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
689k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
689k
                    }
2782
4.25G
                }
2783
2.86M
                xmlFree(rep);
2784
2.86M
                rep = NULL;
2785
2.86M
      }
2786
10.4G
  } else {
2787
10.4G
      COPY_BUF(l,buffer,nbchars,c);
2788
10.4G
      str += l;
2789
10.4G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
4.39M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
4.39M
      }
2792
10.4G
  }
2793
18.9G
  if (str < last)
2794
18.7G
      c = CUR_SCHAR(str, l);
2795
159M
  else
2796
159M
      c = 0;
2797
18.9G
    }
2798
160M
    buffer[nbchars] = 0;
2799
160M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
55.7k
int_error:
2804
55.7k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
55.7k
    if (buffer != NULL)
2807
55.7k
        xmlFree(buffer);
2808
55.7k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
45.2k
                           xmlChar end3) {
2836
45.2k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
45.2k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
45.2k
                                      end, end2, end3, 0));
2840
45.2k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
1.27M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
1.27M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
1.27M
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
1.27M
                                      end, end2, end3, 0));
2868
1.27M
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
30.4M
                     int blank_chars) {
2890
30.4M
    int i, ret;
2891
30.4M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
30.4M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
800k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
29.6M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
29.6M
        (*(ctxt->space) == -2))
2905
6.16M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
23.4M
    if (blank_chars == 0) {
2911
95.8M
  for (i = 0;i < len;i++)
2912
82.5M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
15.1M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
21.5M
    if (ctxt->node == NULL) return(0);
2919
21.3M
    if (ctxt->myDoc != NULL) {
2920
21.3M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
21.3M
        if (ret == 0) return(1);
2922
17.1M
        if (ret == 1) return(0);
2923
17.1M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
16.9M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
16.8M
    if ((ctxt->node->children == NULL) &&
2930
16.8M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
16.8M
    lastChild = xmlGetLastChild(ctxt->node);
2933
16.8M
    if (lastChild == NULL) {
2934
1.43M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
1.43M
            (ctxt->node->content != NULL)) return(0);
2936
15.4M
    } else if (xmlNodeIsText(lastChild))
2937
81.8k
        return(0);
2938
15.3M
    else if ((ctxt->node->children != NULL) &&
2939
15.3M
             (xmlNodeIsText(ctxt->node->children)))
2940
83.5k
        return(0);
2941
16.7M
    return(1);
2942
16.8M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
75.5M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
75.5M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
75.5M
    xmlChar *buffer = NULL;
2973
75.5M
    int len = 0;
2974
75.5M
    int max = XML_MAX_NAMELEN;
2975
75.5M
    xmlChar *ret = NULL;
2976
75.5M
    const xmlChar *cur = name;
2977
75.5M
    int c;
2978
2979
75.5M
    if (prefix == NULL) return(NULL);
2980
75.5M
    *prefix = NULL;
2981
2982
75.5M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
75.5M
    if (cur[0] == ':')
2993
31.0k
  return(xmlStrdup(name));
2994
2995
75.5M
    c = *cur++;
2996
304M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
229M
  buf[len++] = c;
2998
229M
  c = *cur++;
2999
229M
    }
3000
75.5M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
29.5k
  max = len * 2;
3006
3007
29.5k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
29.5k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
29.5k
  memcpy(buffer, buf, len);
3013
70.7M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
70.6M
      if (len + 10 > max) {
3015
66.1k
          xmlChar *tmp;
3016
3017
66.1k
    max *= 2;
3018
66.1k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
66.1k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
66.1k
    buffer = tmp;
3025
66.1k
      }
3026
70.6M
      buffer[len++] = c;
3027
70.6M
      c = *cur++;
3028
70.6M
  }
3029
29.5k
  buffer[len] = 0;
3030
29.5k
    }
3031
3032
75.5M
    if ((c == ':') && (*cur == 0)) {
3033
23.6k
        if (buffer != NULL)
3034
419
      xmlFree(buffer);
3035
23.6k
  *prefix = NULL;
3036
23.6k
  return(xmlStrdup(name));
3037
23.6k
    }
3038
3039
75.4M
    if (buffer == NULL)
3040
75.4M
  ret = xmlStrndup(buf, len);
3041
29.1k
    else {
3042
29.1k
  ret = buffer;
3043
29.1k
  buffer = NULL;
3044
29.1k
  max = XML_MAX_NAMELEN;
3045
29.1k
    }
3046
3047
3048
75.4M
    if (c == ':') {
3049
2.61M
  c = *cur;
3050
2.61M
        *prefix = ret;
3051
2.61M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
2.61M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
2.61M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
2.61M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
2.61M
        (c == '_') || (c == ':'))) {
3063
16.0k
      int l;
3064
16.0k
      int first = CUR_SCHAR(cur, l);
3065
3066
16.0k
      if (!IS_LETTER(first) && (first != '_')) {
3067
5.52k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
5.52k
          "Name %s is not XML Namespace compliant\n",
3069
5.52k
          name);
3070
5.52k
      }
3071
16.0k
  }
3072
2.61M
  cur++;
3073
3074
17.2M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
14.6M
      buf[len++] = c;
3076
14.6M
      c = *cur++;
3077
14.6M
  }
3078
2.61M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
13.4k
      max = len * 2;
3084
3085
13.4k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
13.4k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
13.4k
      memcpy(buffer, buf, len);
3091
35.2M
      while (c != 0) { /* tested bigname2.xml */
3092
35.2M
    if (len + 10 > max) {
3093
23.4k
        xmlChar *tmp;
3094
3095
23.4k
        max *= 2;
3096
23.4k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
23.4k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
23.4k
        buffer = tmp;
3103
23.4k
    }
3104
35.2M
    buffer[len++] = c;
3105
35.2M
    c = *cur++;
3106
35.2M
      }
3107
13.4k
      buffer[len] = 0;
3108
13.4k
  }
3109
3110
2.61M
  if (buffer == NULL)
3111
2.59M
      ret = xmlStrndup(buf, len);
3112
13.4k
  else {
3113
13.4k
      ret = buffer;
3114
13.4k
  }
3115
2.61M
    }
3116
3117
75.4M
    return(ret);
3118
75.4M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
228M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
228M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
160M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
160M
      (((c >= 'a') && (c <= 'z')) ||
3160
160M
       ((c >= 'A') && (c <= 'Z')) ||
3161
160M
       (c == '_') || (c == ':') ||
3162
160M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
160M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
160M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
160M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
160M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
160M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
160M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
160M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
160M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
160M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
160M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
160M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
159M
      return(1);
3175
160M
    } else {
3176
68.1M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
67.8M
      return(1);
3178
68.1M
    }
3179
975k
    return(0);
3180
228M
}
3181
3182
static int
3183
13.8G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
13.8G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
9.80G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
9.80G
      (((c >= 'a') && (c <= 'z')) ||
3191
9.80G
       ((c >= 'A') && (c <= 'Z')) ||
3192
9.80G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
9.80G
       (c == '_') || (c == ':') ||
3194
9.80G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
9.80G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
9.80G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
9.80G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
9.80G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
9.80G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
9.80G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
9.80G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
9.80G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
9.80G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
9.80G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
9.80G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
9.80G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
9.80G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
9.80G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
9.63G
       return(1);
3210
9.80G
    } else {
3211
4.04G
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
4.04G
            (c == '.') || (c == '-') ||
3213
4.04G
      (c == '_') || (c == ':') ||
3214
4.04G
      (IS_COMBINING(c)) ||
3215
4.04G
      (IS_EXTENDER(c)))
3216
3.97G
      return(1);
3217
4.04G
    }
3218
232M
    return(0);
3219
13.8G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
7.03M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
7.03M
    int len = 0, l;
3227
7.03M
    int c;
3228
7.03M
    int count = 0;
3229
7.03M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
3.22M
                    XML_MAX_TEXT_LENGTH :
3231
7.03M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
7.03M
    GROW;
3241
7.03M
    if (ctxt->instate == XML_PARSER_EOF)
3242
138
        return(NULL);
3243
7.03M
    c = CUR_CHAR(l);
3244
7.03M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
4.58M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
4.58M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
4.46M
         ((c >= 'A') && (c <= 'Z')) ||
3252
4.46M
         (c == '_') || (c == ':') ||
3253
4.46M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
4.46M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
4.46M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
4.46M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
4.46M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
4.46M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
4.46M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
4.46M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
4.46M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
4.46M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
4.46M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
4.46M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
2.03M
      return(NULL);
3266
2.03M
  }
3267
2.54M
  len += l;
3268
2.54M
  NEXTL(l);
3269
2.54M
  c = CUR_CHAR(l);
3270
197M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
197M
         (((c >= 'a') && (c <= 'z')) ||
3272
197M
          ((c >= 'A') && (c <= 'Z')) ||
3273
197M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
197M
          (c == '_') || (c == ':') ||
3275
197M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
197M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
197M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
197M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
197M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
197M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
197M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
197M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
197M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
197M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
197M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
197M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
197M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
197M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
197M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
197M
    )) {
3291
194M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
1.75M
    count = 0;
3293
1.75M
    GROW;
3294
1.75M
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
1.75M
      }
3297
194M
            if (len <= INT_MAX - l)
3298
194M
          len += l;
3299
194M
      NEXTL(l);
3300
194M
      c = CUR_CHAR(l);
3301
194M
  }
3302
2.54M
    } else {
3303
2.45M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
2.45M
      (!IS_LETTER(c) && (c != '_') &&
3305
2.34M
       (c != ':'))) {
3306
937k
      return(NULL);
3307
937k
  }
3308
1.51M
  len += l;
3309
1.51M
  NEXTL(l);
3310
1.51M
  c = CUR_CHAR(l);
3311
3312
102M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
102M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
102M
    (c == '.') || (c == '-') ||
3315
102M
    (c == '_') || (c == ':') ||
3316
102M
    (IS_COMBINING(c)) ||
3317
102M
    (IS_EXTENDER(c)))) {
3318
100M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
910k
    count = 0;
3320
910k
    GROW;
3321
910k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
910k
      }
3324
100M
            if (len <= INT_MAX - l)
3325
100M
          len += l;
3326
100M
      NEXTL(l);
3327
100M
      c = CUR_CHAR(l);
3328
100M
  }
3329
1.51M
    }
3330
4.05M
    if (len > maxLength) {
3331
305
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
305
        return(NULL);
3333
305
    }
3334
4.05M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
4.05M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
3.52k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
4.05M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
4.05M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
583M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
583M
    const xmlChar *in;
3370
583M
    const xmlChar *ret;
3371
583M
    size_t count = 0;
3372
583M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
218M
                       XML_MAX_TEXT_LENGTH :
3374
583M
                       XML_MAX_NAME_LENGTH;
3375
3376
583M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
583M
    in = ctxt->input->cur;
3386
583M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
583M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
583M
  (*in == '_') || (*in == ':')) {
3389
579M
  in++;
3390
2.62G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
2.62G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
2.62G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
2.62G
         (*in == '_') || (*in == '-') ||
3394
2.62G
         (*in == ':') || (*in == '.'))
3395
2.04G
      in++;
3396
579M
  if ((*in > 0) && (*in < 0x80)) {
3397
576M
      count = in - ctxt->input->cur;
3398
576M
            if (count > maxLength) {
3399
301
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
301
                return(NULL);
3401
301
            }
3402
576M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
576M
      ctxt->input->cur = in;
3404
576M
      ctxt->input->col += count;
3405
576M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
576M
      return(ret);
3408
576M
  }
3409
579M
    }
3410
    /* accelerator for special cases */
3411
7.03M
    return(xmlParseNameComplex(ctxt));
3412
583M
}
3413
3414
static const xmlChar *
3415
1.31M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
1.31M
    int len = 0, l;
3417
1.31M
    int c;
3418
1.31M
    int count = 0;
3419
1.31M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
486k
                    XML_MAX_TEXT_LENGTH :
3421
1.31M
                    XML_MAX_NAME_LENGTH;
3422
1.31M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
1.31M
    GROW;
3432
1.31M
    startPosition = CUR_PTR - BASE_PTR;
3433
1.31M
    c = CUR_CHAR(l);
3434
1.31M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
1.31M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
1.06M
  return(NULL);
3437
1.06M
    }
3438
3439
22.5M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
22.5M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
22.2M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
202k
      count = 0;
3443
202k
      GROW;
3444
202k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
202k
  }
3447
22.2M
        if (len <= INT_MAX - l)
3448
22.2M
      len += l;
3449
22.2M
  NEXTL(l);
3450
22.2M
  c = CUR_CHAR(l);
3451
22.2M
  if (c == 0) {
3452
47.5k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
47.5k
      ctxt->input->cur -= l;
3459
47.5k
      GROW;
3460
47.5k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
47.5k
      ctxt->input->cur += l;
3463
47.5k
      c = CUR_CHAR(l);
3464
47.5k
  }
3465
22.2M
    }
3466
256k
    if (len > maxLength) {
3467
304
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
304
        return(NULL);
3469
304
    }
3470
255k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
256k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
89.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
89.0M
    const xmlChar *in, *e;
3491
89.0M
    const xmlChar *ret;
3492
89.0M
    size_t count = 0;
3493
89.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
19.1M
                       XML_MAX_TEXT_LENGTH :
3495
89.0M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
89.0M
    in = ctxt->input->cur;
3505
89.0M
    e = ctxt->input->end;
3506
89.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
89.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
89.0M
   (*in == '_')) && (in < e)) {
3509
87.9M
  in++;
3510
275M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
275M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
275M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
275M
          (*in == '_') || (*in == '-') ||
3514
275M
          (*in == '.')) && (in < e))
3515
187M
      in++;
3516
87.9M
  if (in >= e)
3517
13.1k
      goto complex;
3518
87.8M
  if ((*in > 0) && (*in < 0x80)) {
3519
87.7M
      count = in - ctxt->input->cur;
3520
87.7M
            if (count > maxLength) {
3521
235
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
235
                return(NULL);
3523
235
            }
3524
87.7M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
87.7M
      ctxt->input->cur = in;
3526
87.7M
      ctxt->input->col += count;
3527
87.7M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
87.7M
      return(ret);
3531
87.7M
  }
3532
87.8M
    }
3533
1.31M
complex:
3534
1.31M
    return(xmlParseNCNameComplex(ctxt));
3535
89.0M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
29.3M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
29.3M
    register const xmlChar *cmp = other;
3551
29.3M
    register const xmlChar *in;
3552
29.3M
    const xmlChar *ret;
3553
3554
29.3M
    GROW;
3555
29.3M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
29.3M
    in = ctxt->input->cur;
3559
146M
    while (*in != 0 && *in == *cmp) {
3560
117M
  ++in;
3561
117M
  ++cmp;
3562
117M
    }
3563
29.3M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
28.8M
  ctxt->input->col += in - ctxt->input->cur;
3566
28.8M
  ctxt->input->cur = in;
3567
28.8M
  return (const xmlChar*) 1;
3568
28.8M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
571k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
571k
    if (ret == other) {
3573
32.8k
  return (const xmlChar*) 1;
3574
32.8k
    }
3575
538k
    return ret;
3576
571k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
227M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
227M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
227M
    const xmlChar *cur = *str;
3600
227M
    int len = 0, l;
3601
227M
    int c;
3602
227M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
76.0M
                    XML_MAX_TEXT_LENGTH :
3604
227M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
227M
    c = CUR_SCHAR(cur, l);
3611
227M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
60.6k
  return(NULL);
3613
60.6k
    }
3614
3615
227M
    COPY_BUF(l,buf,len,c);
3616
227M
    cur += l;
3617
227M
    c = CUR_SCHAR(cur, l);
3618
4.91G
    while (xmlIsNameChar(ctxt, c)) {
3619
4.73G
  COPY_BUF(l,buf,len,c);
3620
4.73G
  cur += l;
3621
4.73G
  c = CUR_SCHAR(cur, l);
3622
4.73G
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
38.7M
      xmlChar *buffer;
3628
38.7M
      int max = len * 2;
3629
3630
38.7M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
38.7M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
38.7M
      memcpy(buffer, buf, len);
3636
8.84G
      while (xmlIsNameChar(ctxt, c)) {
3637
8.80G
    if (len + 10 > max) {
3638
38.7M
        xmlChar *tmp;
3639
3640
38.7M
        max *= 2;
3641
38.7M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
38.7M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
38.7M
        buffer = tmp;
3648
38.7M
    }
3649
8.80G
    COPY_BUF(l,buffer,len,c);
3650
8.80G
    cur += l;
3651
8.80G
    c = CUR_SCHAR(cur, l);
3652
8.80G
                if (len > maxLength) {
3653
38
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
38
                    xmlFree(buffer);
3655
38
                    return(NULL);
3656
38
                }
3657
8.80G
      }
3658
38.7M
      buffer[len] = 0;
3659
38.7M
      *str = cur;
3660
38.7M
      return(buffer);
3661
38.7M
  }
3662
4.73G
    }
3663
188M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
188M
    *str = cur;
3668
188M
    return(xmlStrndup(buf, len));
3669
188M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
4.94M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
4.94M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
4.94M
    int len = 0, l;
3690
4.94M
    int c;
3691
4.94M
    int count = 0;
3692
4.94M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
1.57M
                    XML_MAX_TEXT_LENGTH :
3694
4.94M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
4.94M
    GROW;
3701
4.94M
    if (ctxt->instate == XML_PARSER_EOF)
3702
12
        return(NULL);
3703
4.94M
    c = CUR_CHAR(l);
3704
3705
29.5M
    while (xmlIsNameChar(ctxt, c)) {
3706
24.5M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
24.5M
  COPY_BUF(l,buf,len,c);
3711
24.5M
  NEXTL(l);
3712
24.5M
  c = CUR_CHAR(l);
3713
24.5M
  if (c == 0) {
3714
3.14k
      count = 0;
3715
3.14k
      GROW;
3716
3.14k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
3.14k
            c = CUR_CHAR(l);
3719
3.14k
  }
3720
24.5M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
6.76k
      xmlChar *buffer;
3726
6.76k
      int max = len * 2;
3727
3728
6.76k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
6.76k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
6.76k
      memcpy(buffer, buf, len);
3734
25.8M
      while (xmlIsNameChar(ctxt, c)) {
3735
25.8M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
257k
        count = 0;
3737
257k
        GROW;
3738
257k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
257k
    }
3743
25.8M
    if (len + 10 > max) {
3744
19.2k
        xmlChar *tmp;
3745
3746
19.2k
        max *= 2;
3747
19.2k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
19.2k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
19.2k
        buffer = tmp;
3754
19.2k
    }
3755
25.8M
    COPY_BUF(l,buffer,len,c);
3756
25.8M
    NEXTL(l);
3757
25.8M
    c = CUR_CHAR(l);
3758
25.8M
                if (len > maxLength) {
3759
198
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
198
                    xmlFree(buffer);
3761
198
                    return(NULL);
3762
198
                }
3763
25.8M
      }
3764
6.56k
      buffer[len] = 0;
3765
6.56k
      return(buffer);
3766
6.76k
  }
3767
24.5M
    }
3768
4.93M
    if (len == 0)
3769
44.7k
        return(NULL);
3770
4.89M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
4.89M
    return(xmlStrndup(buf, len));
3775
4.89M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
4.38M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
4.38M
    xmlChar *buf = NULL;
3795
4.38M
    int len = 0;
3796
4.38M
    int size = XML_PARSER_BUFFER_SIZE;
3797
4.38M
    int c, l;
3798
4.38M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
1.32M
                    XML_MAX_HUGE_LENGTH :
3800
4.38M
                    XML_MAX_TEXT_LENGTH;
3801
4.38M
    xmlChar stop;
3802
4.38M
    xmlChar *ret = NULL;
3803
4.38M
    const xmlChar *cur = NULL;
3804
4.38M
    xmlParserInputPtr input;
3805
3806
4.38M
    if (RAW == '"') stop = '"';
3807
819k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
4.38M
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
4.38M
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
4.38M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
4.38M
    input = ctxt->input;
3824
4.38M
    GROW;
3825
4.38M
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
4.38M
    NEXT;
3828
4.38M
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
458M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
458M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
454M
  if (len + 5 >= size) {
3841
983k
      xmlChar *tmp;
3842
3843
983k
      size *= 2;
3844
983k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
983k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
983k
      buf = tmp;
3850
983k
  }
3851
454M
  COPY_BUF(l,buf,len,c);
3852
454M
  NEXTL(l);
3853
3854
454M
  GROW;
3855
454M
  c = CUR_CHAR(l);
3856
454M
  if (c == 0) {
3857
3.81k
      GROW;
3858
3.81k
      c = CUR_CHAR(l);
3859
3.81k
  }
3860
3861
454M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
454M
    }
3867
4.38M
    buf[len] = 0;
3868
4.38M
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
4.38M
    if (c != stop) {
3871
5.94k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
5.94k
        goto error;
3873
5.94k
    }
3874
4.37M
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
4.37M
    cur = buf;
3882
218M
    while (*cur != 0) { /* non input consuming */
3883
214M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
4.29M
      xmlChar *name;
3885
4.29M
      xmlChar tmp = *cur;
3886
4.29M
            int nameOk = 0;
3887
3888
4.29M
      cur++;
3889
4.29M
      name = xmlParseStringName(ctxt, &cur);
3890
4.29M
            if (name != NULL) {
3891
4.28M
                nameOk = 1;
3892
4.28M
                xmlFree(name);
3893
4.28M
            }
3894
4.29M
            if ((nameOk == 0) || (*cur != ';')) {
3895
25.1k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
25.1k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
25.1k
                            tmp);
3898
25.1k
                goto error;
3899
25.1k
      }
3900
4.26M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
4.26M
    (ctxt->inputNr == 1)) {
3902
5.76k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
5.76k
                goto error;
3904
5.76k
      }
3905
4.26M
      if (*cur == 0)
3906
0
          break;
3907
4.26M
  }
3908
214M
  cur++;
3909
214M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
4.34M
    ++ctxt->depth;
3920
4.34M
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
4.34M
                                     0, 0, 0, /* check */ 1);
3922
4.34M
    --ctxt->depth;
3923
3924
4.34M
    if (orig != NULL) {
3925
4.34M
        *orig = buf;
3926
4.34M
        buf = NULL;
3927
4.34M
    }
3928
3929
4.38M
error:
3930
4.38M
    if (buf != NULL)
3931
36.8k
        xmlFree(buf);
3932
4.38M
    return(ret);
3933
4.34M
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
2.39M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
2.39M
    xmlChar limit = 0;
3950
2.39M
    xmlChar *buf = NULL;
3951
2.39M
    xmlChar *rep = NULL;
3952
2.39M
    size_t len = 0;
3953
2.39M
    size_t buf_size = 0;
3954
2.39M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
860k
                       XML_MAX_HUGE_LENGTH :
3956
2.39M
                       XML_MAX_TEXT_LENGTH;
3957
2.39M
    int c, l, in_space = 0;
3958
2.39M
    xmlChar *current = NULL;
3959
2.39M
    xmlEntityPtr ent;
3960
3961
2.39M
    if (NXT(0) == '"') {
3962
1.45M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
1.45M
  limit = '"';
3964
1.45M
        NEXT;
3965
1.45M
    } else if (NXT(0) == '\'') {
3966
941k
  limit = '\'';
3967
941k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
941k
        NEXT;
3969
941k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
2.39M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
2.39M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
2.39M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
2.39M
    c = CUR_CHAR(l);
3985
73.8M
    while (((NXT(0) != limit) && /* checked */
3986
73.8M
            (IS_CHAR(c)) && (c != '<')) &&
3987
73.8M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
71.4M
  if (c == '&') {
3989
9.56M
      in_space = 0;
3990
9.56M
      if (NXT(1) == '#') {
3991
1.49M
    int val = xmlParseCharRef(ctxt);
3992
3993
1.49M
    if (val == '&') {
3994
9.26k
        if (ctxt->replaceEntities) {
3995
4.14k
      if (len + 10 > buf_size) {
3996
474
          growBuffer(buf, 10);
3997
474
      }
3998
4.14k
      buf[len++] = '&';
3999
5.11k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
5.11k
      if (len + 10 > buf_size) {
4005
532
          growBuffer(buf, 10);
4006
532
      }
4007
5.11k
      buf[len++] = '&';
4008
5.11k
      buf[len++] = '#';
4009
5.11k
      buf[len++] = '3';
4010
5.11k
      buf[len++] = '8';
4011
5.11k
      buf[len++] = ';';
4012
5.11k
        }
4013
1.48M
    } else if (val != 0) {
4014
1.32M
        if (len + 10 > buf_size) {
4015
8.49k
      growBuffer(buf, 10);
4016
8.49k
        }
4017
1.32M
        len += xmlCopyChar(0, &buf[len], val);
4018
1.32M
    }
4019
8.06M
      } else {
4020
8.06M
    ent = xmlParseEntityRef(ctxt);
4021
8.06M
    if ((ent != NULL) &&
4022
8.06M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
374k
        if (len + 10 > buf_size) {
4024
814
      growBuffer(buf, 10);
4025
814
        }
4026
374k
        if ((ctxt->replaceEntities == 0) &&
4027
374k
            (ent->content[0] == '&')) {
4028
128k
      buf[len++] = '&';
4029
128k
      buf[len++] = '#';
4030
128k
      buf[len++] = '3';
4031
128k
      buf[len++] = '8';
4032
128k
      buf[len++] = ';';
4033
246k
        } else {
4034
246k
      buf[len++] = ent->content[0];
4035
246k
        }
4036
7.69M
    } else if ((ent != NULL) &&
4037
7.69M
               (ctxt->replaceEntities != 0)) {
4038
3.52M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
3.52M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
23
                            goto error;
4041
4042
3.52M
      ++ctxt->depth;
4043
3.52M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
3.52M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
3.52M
                                /* check */ 1);
4046
3.52M
      --ctxt->depth;
4047
3.52M
      if (rep != NULL) {
4048
3.44M
          current = rep;
4049
928M
          while (*current != 0) { /* non input consuming */
4050
925M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
925M
                                    (*current == 0x9)) {
4052
2.18M
                                    buf[len++] = 0x20;
4053
2.18M
                                    current++;
4054
2.18M
                                } else
4055
923M
                                    buf[len++] = *current++;
4056
925M
        if (len + 10 > buf_size) {
4057
62.2k
            growBuffer(buf, 10);
4058
62.2k
        }
4059
925M
          }
4060
3.44M
          xmlFree(rep);
4061
3.44M
          rep = NULL;
4062
3.44M
      }
4063
3.52M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
4.16M
    } else if (ent != NULL) {
4071
3.43M
        int i = xmlStrlen(ent->name);
4072
3.43M
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
3.43M
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
3.43M
      (ent->content != NULL)) {
4081
3.34M
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
40.4k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
40.4k
                            ctxt->sizeentcopy = ent->length;
4085
4086
40.4k
                            ++ctxt->depth;
4087
40.4k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
40.4k
                                    ent->content, ent->length,
4089
40.4k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
40.4k
                                    /* check */ 1);
4091
40.4k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
40.4k
                            if (ctxt->inSubset == 0) {
4100
36.0k
                                ent->flags |= XML_ENT_CHECKED;
4101
36.0k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
36.0k
                            }
4103
4104
40.4k
                            if (rep != NULL) {
4105
39.0k
                                xmlFree(rep);
4106
39.0k
                                rep = NULL;
4107
39.0k
                            } else {
4108
1.42k
                                ent->content[0] = 0;
4109
1.42k
                            }
4110
4111
40.4k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
885
                                goto error;
4113
3.29M
                        } else {
4114
3.29M
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
147
                                goto error;
4116
3.29M
                        }
4117
3.34M
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
3.43M
        buf[len++] = '&';
4123
3.44M
        while (len + i + 10 > buf_size) {
4124
23.5k
      growBuffer(buf, i + 10);
4125
23.5k
        }
4126
10.7M
        for (;i > 0;i--)
4127
7.27M
      buf[len++] = *cur++;
4128
3.43M
        buf[len++] = ';';
4129
3.43M
    }
4130
8.06M
      }
4131
61.8M
  } else {
4132
61.8M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
9.30M
          if ((len != 0) || (!normalize)) {
4134
9.03M
        if ((!normalize) || (!in_space)) {
4135
8.88M
      COPY_BUF(l,buf,len,0x20);
4136
8.89M
      while (len + 10 > buf_size) {
4137
31.6k
          growBuffer(buf, 10);
4138
31.6k
      }
4139
8.88M
        }
4140
9.03M
        in_space = 1;
4141
9.03M
    }
4142
52.5M
      } else {
4143
52.5M
          in_space = 0;
4144
52.5M
    COPY_BUF(l,buf,len,c);
4145
52.5M
    if (len + 10 > buf_size) {
4146
158k
        growBuffer(buf, 10);
4147
158k
    }
4148
52.5M
      }
4149
61.8M
      NEXTL(l);
4150
61.8M
  }
4151
71.4M
  GROW;
4152
71.4M
  c = CUR_CHAR(l);
4153
71.4M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
71.4M
    }
4159
2.39M
    if (ctxt->instate == XML_PARSER_EOF)
4160
2.78k
        goto error;
4161
4162
2.39M
    if ((in_space) && (normalize)) {
4163
184k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
83.5k
    }
4165
2.39M
    buf[len] = 0;
4166
2.39M
    if (RAW == '<') {
4167
539k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.85M
    } else if (RAW != limit) {
4169
345k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
159k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
159k
         "invalid character in attribute value\n");
4172
186k
  } else {
4173
186k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
186k
         "AttValue: ' expected\n");
4175
186k
        }
4176
345k
    } else
4177
1.50M
  NEXT;
4178
4179
2.39M
    if (attlen != NULL) *attlen = len;
4180
2.39M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
3.83k
error:
4185
3.83k
    if (buf != NULL)
4186
3.83k
        xmlFree(buf);
4187
3.83k
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
3.83k
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
35.5M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
35.5M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
35.5M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
35.5M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
3.74M
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
3.74M
    xmlChar *buf = NULL;
4250
3.74M
    int len = 0;
4251
3.74M
    int size = XML_PARSER_BUFFER_SIZE;
4252
3.74M
    int cur, l;
4253
3.74M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
933k
                    XML_MAX_TEXT_LENGTH :
4255
3.74M
                    XML_MAX_NAME_LENGTH;
4256
3.74M
    xmlChar stop;
4257
3.74M
    int state = ctxt->instate;
4258
3.74M
    int count = 0;
4259
4260
3.74M
    SHRINK;
4261
3.74M
    if (RAW == '"') {
4262
578k
        NEXT;
4263
578k
  stop = '"';
4264
3.16M
    } else if (RAW == '\'') {
4265
3.03M
        NEXT;
4266
3.03M
  stop = '\'';
4267
3.03M
    } else {
4268
128k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
128k
  return(NULL);
4270
128k
    }
4271
4272
3.61M
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
3.61M
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
3.61M
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
3.61M
    cur = CUR_CHAR(l);
4279
39.5M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
35.8M
  if (len + 5 >= size) {
4281
38.6k
      xmlChar *tmp;
4282
4283
38.6k
      size *= 2;
4284
38.6k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
38.6k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
38.6k
      buf = tmp;
4292
38.6k
  }
4293
35.8M
  count++;
4294
35.8M
  if (count > 50) {
4295
458k
      SHRINK;
4296
458k
      GROW;
4297
458k
      count = 0;
4298
458k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
458k
  }
4303
35.8M
  COPY_BUF(l,buf,len,cur);
4304
35.8M
  NEXTL(l);
4305
35.8M
  cur = CUR_CHAR(l);
4306
35.8M
  if (cur == 0) {
4307
7.02k
      GROW;
4308
7.02k
      SHRINK;
4309
7.02k
      cur = CUR_CHAR(l);
4310
7.02k
  }
4311
35.8M
        if (len > maxLength) {
4312
199
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
199
            xmlFree(buf);
4314
199
            ctxt->instate = (xmlParserInputState) state;
4315
199
            return(NULL);
4316
199
        }
4317
35.8M
    }
4318
3.61M
    buf[len] = 0;
4319
3.61M
    ctxt->instate = (xmlParserInputState) state;
4320
3.61M
    if (!IS_CHAR(cur)) {
4321
10.5k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
3.60M
    } else {
4323
3.60M
  NEXT;
4324
3.60M
    }
4325
3.61M
    return(buf);
4326
3.61M
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
1.61M
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
1.61M
    xmlChar *buf = NULL;
4344
1.61M
    int len = 0;
4345
1.61M
    int size = XML_PARSER_BUFFER_SIZE;
4346
1.61M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
542k
                    XML_MAX_TEXT_LENGTH :
4348
1.61M
                    XML_MAX_NAME_LENGTH;
4349
1.61M
    xmlChar cur;
4350
1.61M
    xmlChar stop;
4351
1.61M
    int count = 0;
4352
1.61M
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
1.61M
    SHRINK;
4355
1.61M
    if (RAW == '"') {
4356
112k
        NEXT;
4357
112k
  stop = '"';
4358
1.50M
    } else if (RAW == '\'') {
4359
1.48M
        NEXT;
4360
1.48M
  stop = '\'';
4361
1.48M
    } else {
4362
20.8k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
20.8k
  return(NULL);
4364
20.8k
    }
4365
1.59M
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
1.59M
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
1.59M
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
1.59M
    cur = CUR;
4372
16.6M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
15.0M
  if (len + 1 >= size) {
4374
24.6k
      xmlChar *tmp;
4375
4376
24.6k
      size *= 2;
4377
24.6k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
24.6k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
24.6k
      buf = tmp;
4384
24.6k
  }
4385
15.0M
  buf[len++] = cur;
4386
15.0M
  count++;
4387
15.0M
  if (count > 50) {
4388
190k
      SHRINK;
4389
190k
      GROW;
4390
190k
      count = 0;
4391
190k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
190k
  }
4396
15.0M
  NEXT;
4397
15.0M
  cur = CUR;
4398
15.0M
  if (cur == 0) {
4399
2.58k
      GROW;
4400
2.58k
      SHRINK;
4401
2.58k
      cur = CUR;
4402
2.58k
  }
4403
15.0M
        if (len > maxLength) {
4404
15
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
15
            xmlFree(buf);
4406
15
            return(NULL);
4407
15
        }
4408
15.0M
    }
4409
1.59M
    buf[len] = 0;
4410
1.59M
    if (cur != stop) {
4411
47.8k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
1.54M
    } else {
4413
1.54M
  NEXT;
4414
1.54M
    }
4415
1.59M
    ctxt->instate = oldstate;
4416
1.59M
    return(buf);
4417
1.59M
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
101M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
101M
    const xmlChar *in;
4482
101M
    int nbchar = 0;
4483
101M
    int line = ctxt->input->line;
4484
101M
    int col = ctxt->input->col;
4485
101M
    int ccol;
4486
4487
101M
    SHRINK;
4488
101M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
101M
    in = ctxt->input->cur;
4494
132M
    do {
4495
161M
get_more_space:
4496
200M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
161M
        if (*in == 0xA) {
4498
30.6M
            do {
4499
30.6M
                ctxt->input->line++; ctxt->input->col = 1;
4500
30.6M
                in++;
4501
30.6M
            } while (*in == 0xA);
4502
29.3M
            goto get_more_space;
4503
29.3M
        }
4504
132M
        if (*in == '<') {
4505
22.5M
            nbchar = in - ctxt->input->cur;
4506
22.5M
            if (nbchar > 0) {
4507
22.5M
                const xmlChar *tmp = ctxt->input->cur;
4508
22.5M
                ctxt->input->cur = in;
4509
4510
22.5M
                if ((ctxt->sax != NULL) &&
4511
22.5M
                    (ctxt->sax->ignorableWhitespace !=
4512
22.5M
                     ctxt->sax->characters)) {
4513
9.33M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
7.80M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
7.80M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
7.80M
                                                   tmp, nbchar);
4517
7.80M
                    } else {
4518
1.52M
                        if (ctxt->sax->characters != NULL)
4519
1.52M
                            ctxt->sax->characters(ctxt->userData,
4520
1.52M
                                                  tmp, nbchar);
4521
1.52M
                        if (*ctxt->space == -1)
4522
460k
                            *ctxt->space = -2;
4523
1.52M
                    }
4524
13.2M
                } else if ((ctxt->sax != NULL) &&
4525
13.2M
                           (ctxt->sax->characters != NULL)) {
4526
13.2M
                    ctxt->sax->characters(ctxt->userData,
4527
13.2M
                                          tmp, nbchar);
4528
13.2M
                }
4529
22.5M
            }
4530
22.5M
            return;
4531
22.5M
        }
4532
4533
137M
get_more:
4534
137M
        ccol = ctxt->input->col;
4535
1.71G
        while (test_char_data[*in]) {
4536
1.57G
            in++;
4537
1.57G
            ccol++;
4538
1.57G
        }
4539
137M
        ctxt->input->col = ccol;
4540
137M
        if (*in == 0xA) {
4541
25.4M
            do {
4542
25.4M
                ctxt->input->line++; ctxt->input->col = 1;
4543
25.4M
                in++;
4544
25.4M
            } while (*in == 0xA);
4545
24.9M
            goto get_more;
4546
24.9M
        }
4547
112M
        if (*in == ']') {
4548
2.47M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
38.3k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
38.3k
                ctxt->input->cur = in + 1;
4551
38.3k
                return;
4552
38.3k
            }
4553
2.43M
            in++;
4554
2.43M
            ctxt->input->col++;
4555
2.43M
            goto get_more;
4556
2.47M
        }
4557
109M
        nbchar = in - ctxt->input->cur;
4558
109M
        if (nbchar > 0) {
4559
73.8M
            if ((ctxt->sax != NULL) &&
4560
73.8M
                (ctxt->sax->ignorableWhitespace !=
4561
73.8M
                 ctxt->sax->characters) &&
4562
73.8M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
19.4M
                const xmlChar *tmp = ctxt->input->cur;
4564
19.4M
                ctxt->input->cur = in;
4565
4566
19.4M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
13.1M
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
13.1M
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
13.1M
                                                       tmp, nbchar);
4570
13.1M
                } else {
4571
6.36M
                    if (ctxt->sax->characters != NULL)
4572
6.36M
                        ctxt->sax->characters(ctxt->userData,
4573
6.36M
                                              tmp, nbchar);
4574
6.36M
                    if (*ctxt->space == -1)
4575
1.80M
                        *ctxt->space = -2;
4576
6.36M
                }
4577
19.4M
                line = ctxt->input->line;
4578
19.4M
                col = ctxt->input->col;
4579
54.3M
            } else if (ctxt->sax != NULL) {
4580
54.3M
                if (ctxt->sax->characters != NULL)
4581
54.3M
                    ctxt->sax->characters(ctxt->userData,
4582
54.3M
                                          ctxt->input->cur, nbchar);
4583
54.3M
                line = ctxt->input->line;
4584
54.3M
                col = ctxt->input->col;
4585
54.3M
            }
4586
73.8M
        }
4587
109M
        ctxt->input->cur = in;
4588
109M
        if (*in == 0xD) {
4589
30.9M
            in++;
4590
30.9M
            if (*in == 0xA) {
4591
30.8M
                ctxt->input->cur = in;
4592
30.8M
                in++;
4593
30.8M
                ctxt->input->line++; ctxt->input->col = 1;
4594
30.8M
                continue; /* while */
4595
30.8M
            }
4596
64.0k
            in--;
4597
64.0k
        }
4598
79.0M
        if (*in == '<') {
4599
66.6M
            return;
4600
66.6M
        }
4601
12.4M
        if (*in == '&') {
4602
4.93M
            return;
4603
4.93M
        }
4604
7.48M
        SHRINK;
4605
7.48M
        GROW;
4606
7.48M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
7.48M
        in = ctxt->input->cur;
4609
38.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
38.3M
             (*in == 0x09) || (*in == 0x0a));
4611
7.50M
    ctxt->input->line = line;
4612
7.50M
    ctxt->input->col = col;
4613
7.50M
    xmlParseCharDataComplex(ctxt);
4614
7.50M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
7.50M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
7.50M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
7.50M
    int nbchar = 0;
4631
7.50M
    int cur, l;
4632
7.50M
    int count = 0;
4633
4634
7.50M
    SHRINK;
4635
7.50M
    GROW;
4636
7.50M
    cur = CUR_CHAR(l);
4637
96.3M
    while ((cur != '<') && /* checked */
4638
96.3M
           (cur != '&') &&
4639
96.3M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
88.8M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
17.6k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
17.6k
  }
4643
88.8M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
88.8M
  NEXTL(l);
4646
88.8M
  cur = CUR_CHAR(l);
4647
88.8M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
249k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
249k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
222k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
6.80k
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
6.80k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
6.80k
                                     buf, nbchar);
4658
215k
    } else {
4659
215k
        if (ctxt->sax->characters != NULL)
4660
215k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
215k
        if ((ctxt->sax->characters !=
4662
215k
             ctxt->sax->ignorableWhitespace) &&
4663
215k
      (*ctxt->space == -1))
4664
4.13k
      *ctxt->space = -2;
4665
215k
    }
4666
222k
      }
4667
249k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
249k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
249k
  }
4672
88.8M
  count++;
4673
88.8M
  if (count > 50) {
4674
1.41M
      SHRINK;
4675
1.41M
      GROW;
4676
1.41M
      count = 0;
4677
1.41M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.41M
  }
4680
88.8M
    }
4681
7.50M
    if (nbchar != 0) {
4682
1.56M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
1.56M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
1.36M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
3.72k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
3.72k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
1.36M
      } else {
4691
1.36M
    if (ctxt->sax->characters != NULL)
4692
1.36M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
1.36M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
1.36M
        (*ctxt->space == -1))
4695
230k
        *ctxt->space = -2;
4696
1.36M
      }
4697
1.36M
  }
4698
1.56M
    }
4699
7.50M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
5.93M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
5.93M
                          "PCDATA invalid Char value %d\n",
4703
5.93M
                    cur ? cur : CUR);
4704
5.93M
  NEXT;
4705
5.93M
    }
4706
7.50M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
4.18M
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
4.18M
    xmlChar *URI = NULL;
4735
4736
4.18M
    SHRINK;
4737
4738
4.18M
    *publicID = NULL;
4739
4.18M
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
2.18M
        SKIP(6);
4741
2.18M
  if (SKIP_BLANKS == 0) {
4742
9.27k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
9.27k
                     "Space required after 'SYSTEM'\n");
4744
9.27k
  }
4745
2.18M
  URI = xmlParseSystemLiteral(ctxt);
4746
2.18M
  if (URI == NULL) {
4747
12.7k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
12.7k
        }
4749
2.18M
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
1.61M
        SKIP(6);
4751
1.61M
  if (SKIP_BLANKS == 0) {
4752
20.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
20.7k
        "Space required after 'PUBLIC'\n");
4754
20.7k
  }
4755
1.61M
  *publicID = xmlParsePubidLiteral(ctxt);
4756
1.61M
  if (*publicID == NULL) {
4757
20.8k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
20.8k
  }
4759
1.61M
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
1.55M
      if (SKIP_BLANKS == 0) {
4764
43.4k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
43.4k
      "Space required after the Public Identifier\n");
4766
43.4k
      }
4767
1.55M
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
60.7k
      if (SKIP_BLANKS == 0) return(NULL);
4775
5.17k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
5.17k
  }
4777
1.55M
  URI = xmlParseSystemLiteral(ctxt);
4778
1.55M
  if (URI == NULL) {
4779
115k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
115k
        }
4781
1.55M
    }
4782
4.12M
    return(URI);
4783
4.18M
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
24.7M
                       size_t len, size_t size) {
4802
24.7M
    int q, ql;
4803
24.7M
    int r, rl;
4804
24.7M
    int cur, l;
4805
24.7M
    size_t count = 0;
4806
24.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
9.12M
                       XML_MAX_HUGE_LENGTH :
4808
24.7M
                       XML_MAX_TEXT_LENGTH;
4809
24.7M
    int inputid;
4810
4811
24.7M
    inputid = ctxt->input->id;
4812
4813
24.7M
    if (buf == NULL) {
4814
2.85M
        len = 0;
4815
2.85M
  size = XML_PARSER_BUFFER_SIZE;
4816
2.85M
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
2.85M
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
2.85M
    }
4822
24.7M
    GROW; /* Assure there's enough input data */
4823
24.7M
    q = CUR_CHAR(ql);
4824
24.7M
    if (q == 0)
4825
11.8M
        goto not_terminated;
4826
12.8M
    if (!IS_CHAR(q)) {
4827
15.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
15.2k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
15.2k
                    q);
4830
15.2k
  xmlFree (buf);
4831
15.2k
  return;
4832
15.2k
    }
4833
12.8M
    NEXTL(ql);
4834
12.8M
    r = CUR_CHAR(rl);
4835
12.8M
    if (r == 0)
4836
1.82M
        goto not_terminated;
4837
10.9M
    if (!IS_CHAR(r)) {
4838
2.12k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
2.12k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
2.12k
                    r);
4841
2.12k
  xmlFree (buf);
4842
2.12k
  return;
4843
2.12k
    }
4844
10.9M
    NEXTL(rl);
4845
10.9M
    cur = CUR_CHAR(l);
4846
10.9M
    if (cur == 0)
4847
2.53M
        goto not_terminated;
4848
619M
    while (IS_CHAR(cur) && /* checked */
4849
619M
           ((cur != '>') ||
4850
615M
      (r != '-') || (q != '-'))) {
4851
611M
  if ((r == '-') && (q == '-')) {
4852
70.7M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
70.7M
  }
4854
611M
  if (len + 5 >= size) {
4855
1.15M
      xmlChar *new_buf;
4856
1.15M
            size_t new_size;
4857
4858
1.15M
      new_size = size * 2;
4859
1.15M
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
1.15M
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
1.15M
      buf = new_buf;
4866
1.15M
            size = new_size;
4867
1.15M
  }
4868
611M
  COPY_BUF(ql,buf,len,q);
4869
611M
  q = r;
4870
611M
  ql = rl;
4871
611M
  r = cur;
4872
611M
  rl = l;
4873
4874
611M
  count++;
4875
611M
  if (count > 50) {
4876
10.1M
      SHRINK;
4877
10.1M
      GROW;
4878
10.1M
      count = 0;
4879
10.1M
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
10.1M
  }
4884
611M
  NEXTL(l);
4885
611M
  cur = CUR_CHAR(l);
4886
611M
  if (cur == 0) {
4887
4.27M
      SHRINK;
4888
4.27M
      GROW;
4889
4.27M
      cur = CUR_CHAR(l);
4890
4.27M
  }
4891
4892
611M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
611M
    }
4899
8.45M
    buf[len] = 0;
4900
8.45M
    if (cur == 0) {
4901
4.27M
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
4.27M
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
4.27M
    } else if (!IS_CHAR(cur)) {
4904
7.85k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
7.85k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
7.85k
                    cur);
4907
4.16M
    } else {
4908
4.16M
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
4.16M
        NEXT;
4914
4.16M
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
4.16M
      (!ctxt->disableSAX))
4916
4.06M
      ctxt->sax->comment(ctxt->userData, buf);
4917
4.16M
    }
4918
8.45M
    xmlFree(buf);
4919
8.45M
    return;
4920
16.2M
not_terminated:
4921
16.2M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
16.2M
       "Comment not terminated\n", NULL);
4923
16.2M
    xmlFree(buf);
4924
16.2M
    return;
4925
8.45M
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
422M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
422M
    xmlChar *buf = NULL;
4943
422M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
422M
    size_t len = 0;
4945
422M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
145M
                       XML_MAX_HUGE_LENGTH :
4947
422M
                       XML_MAX_TEXT_LENGTH;
4948
422M
    xmlParserInputState state;
4949
422M
    const xmlChar *in;
4950
422M
    size_t nbchar = 0;
4951
422M
    int ccol;
4952
422M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
422M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
422M
    SKIP(2);
4960
422M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
545
        return;
4962
422M
    state = ctxt->instate;
4963
422M
    ctxt->instate = XML_PARSER_COMMENT;
4964
422M
    inputid = ctxt->input->id;
4965
422M
    SKIP(2);
4966
422M
    SHRINK;
4967
422M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
422M
    in = ctxt->input->cur;
4974
422M
    do {
4975
422M
  if (*in == 0xA) {
4976
929k
      do {
4977
929k
    ctxt->input->line++; ctxt->input->col = 1;
4978
929k
    in++;
4979
929k
      } while (*in == 0xA);
4980
915k
  }
4981
619M
get_more:
4982
619M
        ccol = ctxt->input->col;
4983
2.45G
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
2.45G
         ((*in >= 0x20) && (*in < '-')) ||
4985
2.45G
         (*in == 0x09)) {
4986
1.83G
        in++;
4987
1.83G
        ccol++;
4988
1.83G
  }
4989
619M
  ctxt->input->col = ccol;
4990
619M
  if (*in == 0xA) {
4991
11.3M
      do {
4992
11.3M
    ctxt->input->line++; ctxt->input->col = 1;
4993
11.3M
    in++;
4994
11.3M
      } while (*in == 0xA);
4995
10.9M
      goto get_more;
4996
10.9M
  }
4997
608M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
608M
  if (nbchar > 0) {
5002
238M
      if ((ctxt->sax != NULL) &&
5003
238M
    (ctxt->sax->comment != NULL)) {
5004
238M
    if (buf == NULL) {
5005
70.5M
        if ((*in == '-') && (in[1] == '-'))
5006
47.4M
            size = nbchar + 1;
5007
23.0M
        else
5008
23.0M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
70.5M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
70.5M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
70.5M
        len = 0;
5016
168M
    } else if (len + nbchar + 1 >= size) {
5017
4.27M
        xmlChar *new_buf;
5018
4.27M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
4.27M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
4.27M
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
4.27M
        buf = new_buf;
5027
4.27M
    }
5028
238M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
238M
    len += nbchar;
5030
238M
    buf[len] = 0;
5031
238M
      }
5032
238M
  }
5033
608M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
608M
  ctxt->input->cur = in;
5040
608M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
608M
  if (*in == 0xD) {
5045
9.14M
      in++;
5046
9.14M
      if (*in == 0xA) {
5047
9.14M
    ctxt->input->cur = in;
5048
9.14M
    in++;
5049
9.14M
    ctxt->input->line++; ctxt->input->col = 1;
5050
9.14M
    goto get_more;
5051
9.14M
      }
5052
4.87k
      in--;
5053
4.87k
  }
5054
599M
  SHRINK;
5055
599M
  GROW;
5056
599M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
599M
  in = ctxt->input->cur;
5061
599M
  if (*in == '-') {
5062
574M
      if (in[1] == '-') {
5063
542M
          if (in[2] == '>') {
5064
397M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
397M
        SKIP(3);
5070
397M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
397M
            (!ctxt->disableSAX)) {
5072
273M
      if (buf != NULL)
5073
23.7M
          ctxt->sax->comment(ctxt->userData, buf);
5074
249M
      else
5075
249M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
273M
        }
5077
397M
        if (buf != NULL)
5078
48.6M
            xmlFree(buf);
5079
397M
        if (ctxt->instate != XML_PARSER_EOF)
5080
397M
      ctxt->instate = state;
5081
397M
        return;
5082
397M
    }
5083
145M
    if (buf != NULL) {
5084
138M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
138M
                          "Double hyphen within comment: "
5086
138M
                                      "<!--%.50s\n",
5087
138M
              buf);
5088
138M
    } else
5089
6.19M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
6.19M
                          "Double hyphen within comment\n", NULL);
5091
145M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
145M
    in++;
5096
145M
    ctxt->input->col++;
5097
145M
      }
5098
177M
      in++;
5099
177M
      ctxt->input->col++;
5100
177M
      goto get_more;
5101
574M
  }
5102
599M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
24.7M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
24.7M
    ctxt->instate = state;
5105
24.7M
    return;
5106
422M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
3.39M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
3.39M
    const xmlChar *name;
5125
5126
3.39M
    name = xmlParseName(ctxt);
5127
3.39M
    if ((name != NULL) &&
5128
3.39M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
3.39M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
3.39M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
1.87M
  int i;
5132
1.87M
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
1.87M
      (name[2] == 'l') && (name[3] == 0)) {
5134
1.18M
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
1.18M
     "XML declaration allowed only at the start of the document\n");
5136
1.18M
      return(name);
5137
1.18M
  } else if (name[3] == 0) {
5138
516k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
516k
      return(name);
5140
516k
  }
5141
497k
  for (i = 0;;i++) {
5142
497k
      if (xmlW3CPIs[i] == NULL) break;
5143
341k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
26.4k
          return(name);
5145
341k
  }
5146
156k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
156k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
156k
          NULL, NULL);
5149
156k
    }
5150
1.66M
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
59.2k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
59.2k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
59.2k
    }
5154
1.66M
    return(name);
5155
3.39M
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
13.7k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
13.7k
    xmlChar *URL = NULL;
5176
13.7k
    const xmlChar *tmp, *base;
5177
13.7k
    xmlChar marker;
5178
5179
13.7k
    tmp = catalog;
5180
13.7k
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
13.7k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
2.24k
  goto error;
5183
11.5k
    tmp += 7;
5184
143k
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
11.5k
    if (*tmp != '=') {
5186
2.71k
  return;
5187
2.71k
    }
5188
8.78k
    tmp++;
5189
275k
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
8.78k
    marker = *tmp;
5191
8.78k
    if ((marker != '\'') && (marker != '"'))
5192
2.18k
  goto error;
5193
6.59k
    tmp++;
5194
6.59k
    base = tmp;
5195
399k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
6.59k
    if (*tmp == 0)
5197
1.82k
  goto error;
5198
4.77k
    URL = xmlStrndup(base, tmp - base);
5199
4.77k
    tmp++;
5200
40.8k
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
4.77k
    if (*tmp != 0)
5202
1.86k
  goto error;
5203
5204
2.90k
    if (URL != NULL) {
5205
2.90k
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
2.90k
  xmlFree(URL);
5207
2.90k
    }
5208
2.90k
    return;
5209
5210
8.13k
error:
5211
8.13k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
8.13k
            "Catalog PI syntax error: %s\n",
5213
8.13k
      catalog, NULL);
5214
8.13k
    if (URL != NULL)
5215
1.86k
  xmlFree(URL);
5216
8.13k
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
3.39M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
3.39M
    xmlChar *buf = NULL;
5235
3.39M
    size_t len = 0;
5236
3.39M
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
3.39M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
1.30M
                       XML_MAX_HUGE_LENGTH :
5239
3.39M
                       XML_MAX_TEXT_LENGTH;
5240
3.39M
    int cur, l;
5241
3.39M
    const xmlChar *target;
5242
3.39M
    xmlParserInputState state;
5243
3.39M
    int count = 0;
5244
5245
3.39M
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
3.39M
  int inputid = ctxt->input->id;
5247
3.39M
  state = ctxt->instate;
5248
3.39M
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
3.39M
  SKIP(2);
5253
3.39M
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
3.39M
        target = xmlParsePITarget(ctxt);
5260
3.39M
  if (target != NULL) {
5261
3.35M
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
469k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
469k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
469k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
469k
        (ctxt->sax->processingInstruction != NULL))
5274
384k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
384k
                                         target, NULL);
5276
469k
    if (ctxt->instate != XML_PARSER_EOF)
5277
469k
        ctxt->instate = state;
5278
469k
    return;
5279
469k
      }
5280
2.88M
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
2.88M
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
2.88M
      if (SKIP_BLANKS == 0) {
5287
145k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
145k
        "ParsePI: PI %s space expected\n", target);
5289
145k
      }
5290
2.88M
      cur = CUR_CHAR(l);
5291
93.5M
      while (IS_CHAR(cur) && /* checked */
5292
93.5M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
90.7M
    if (len + 5 >= size) {
5294
68.9k
        xmlChar *tmp;
5295
68.9k
                    size_t new_size = size * 2;
5296
68.9k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
68.9k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
68.9k
        buf = tmp;
5304
68.9k
                    size = new_size;
5305
68.9k
    }
5306
90.7M
    count++;
5307
90.7M
    if (count > 50) {
5308
1.37M
        SHRINK;
5309
1.37M
        GROW;
5310
1.37M
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
1.37M
        count = 0;
5315
1.37M
    }
5316
90.7M
    COPY_BUF(l,buf,len,cur);
5317
90.7M
    NEXTL(l);
5318
90.7M
    cur = CUR_CHAR(l);
5319
90.7M
    if (cur == 0) {
5320
2.23M
        SHRINK;
5321
2.23M
        GROW;
5322
2.23M
        cur = CUR_CHAR(l);
5323
2.23M
    }
5324
90.7M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
90.7M
      }
5332
2.88M
      buf[len] = 0;
5333
2.88M
      if (cur != '?') {
5334
2.26M
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
2.26M
          "ParsePI: PI %s never end ...\n", target);
5336
2.26M
      } else {
5337
617k
    if (inputid != ctxt->input->id) {
5338
49
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
49
                             "PI declaration doesn't start and stop in"
5340
49
                                   " the same entity\n");
5341
49
    }
5342
617k
    SKIP(2);
5343
5344
617k
#ifdef LIBXML_CATALOG_ENABLED
5345
617k
    if (((state == XML_PARSER_MISC) ||
5346
617k
               (state == XML_PARSER_START)) &&
5347
617k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
13.7k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
13.7k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
13.7k
      (allow == XML_CATA_ALLOW_ALL))
5351
13.7k
      xmlParseCatalogPI(ctxt, buf);
5352
13.7k
    }
5353
617k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
617k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
617k
        (ctxt->sax->processingInstruction != NULL))
5361
487k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
487k
                                         target, buf);
5363
617k
      }
5364
2.88M
      xmlFree(buf);
5365
2.88M
  } else {
5366
39.8k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
39.8k
  }
5368
2.92M
  if (ctxt->instate != XML_PARSER_EOF)
5369
2.92M
      ctxt->instate = state;
5370
2.92M
    }
5371
3.39M
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
875k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
875k
    const xmlChar *name;
5394
875k
    xmlChar *Pubid;
5395
875k
    xmlChar *Systemid;
5396
5397
875k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
875k
    SKIP(2);
5400
5401
875k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
874k
  int inputid = ctxt->input->id;
5403
874k
  SHRINK;
5404
874k
  SKIP(8);
5405
874k
  if (SKIP_BLANKS == 0) {
5406
2.82k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
2.82k
         "Space required after '<!NOTATION'\n");
5408
2.82k
      return;
5409
2.82k
  }
5410
5411
871k
        name = xmlParseName(ctxt);
5412
871k
  if (name == NULL) {
5413
6.45k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
6.45k
      return;
5415
6.45k
  }
5416
865k
  if (xmlStrchr(name, ':') != NULL) {
5417
3.43k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
3.43k
         "colons are forbidden from notation names '%s'\n",
5419
3.43k
         name, NULL, NULL);
5420
3.43k
  }
5421
865k
  if (SKIP_BLANKS == 0) {
5422
4.23k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
4.23k
         "Space required after the NOTATION name'\n");
5424
4.23k
      return;
5425
4.23k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
860k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
860k
  SKIP_BLANKS;
5432
5433
860k
  if (RAW == '>') {
5434
828k
      if (inputid != ctxt->input->id) {
5435
150
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
150
                         "Notation declaration doesn't start and stop"
5437
150
                               " in the same entity\n");
5438
150
      }
5439
828k
      NEXT;
5440
828k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
828k
    (ctxt->sax->notationDecl != NULL))
5442
818k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
828k
  } else {
5444
32.4k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
32.4k
  }
5446
860k
  if (Systemid != NULL) xmlFree(Systemid);
5447
860k
  if (Pubid != NULL) xmlFree(Pubid);
5448
860k
    }
5449
875k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
7.09M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
7.09M
    const xmlChar *name = NULL;
5478
7.09M
    xmlChar *value = NULL;
5479
7.09M
    xmlChar *URI = NULL, *literal = NULL;
5480
7.09M
    const xmlChar *ndata = NULL;
5481
7.09M
    int isParameter = 0;
5482
7.09M
    xmlChar *orig = NULL;
5483
5484
7.09M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
7.09M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
7.09M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
7.09M
  int inputid = ctxt->input->id;
5491
7.09M
  SHRINK;
5492
7.09M
  SKIP(6);
5493
7.09M
  if (SKIP_BLANKS == 0) {
5494
34.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
34.3k
         "Space required after '<!ENTITY'\n");
5496
34.3k
  }
5497
5498
7.09M
  if (RAW == '%') {
5499
2.75M
      NEXT;
5500
2.75M
      if (SKIP_BLANKS == 0) {
5501
1.05k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
1.05k
             "Space required after '%%'\n");
5503
1.05k
      }
5504
2.75M
      isParameter = 1;
5505
2.75M
  }
5506
5507
7.09M
        name = xmlParseName(ctxt);
5508
7.09M
  if (name == NULL) {
5509
52.4k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
52.4k
                     "xmlParseEntityDecl: no name\n");
5511
52.4k
            return;
5512
52.4k
  }
5513
7.04M
  if (xmlStrchr(name, ':') != NULL) {
5514
14.8k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
14.8k
         "colons are forbidden from entities names '%s'\n",
5516
14.8k
         name, NULL, NULL);
5517
14.8k
  }
5518
7.04M
  if (SKIP_BLANKS == 0) {
5519
19.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
19.0k
         "Space required after the entity name\n");
5521
19.0k
  }
5522
5523
7.04M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
7.04M
  if (isParameter) {
5528
2.75M
      if ((RAW == '"') || (RAW == '\'')) {
5529
2.69M
          value = xmlParseEntityValue(ctxt, &orig);
5530
2.69M
    if (value) {
5531
2.67M
        if ((ctxt->sax != NULL) &&
5532
2.67M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
2.61M
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
2.61M
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
2.61M
            NULL, NULL, value);
5536
2.67M
    }
5537
2.69M
      } else {
5538
58.8k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
58.8k
    if ((URI == NULL) && (literal == NULL)) {
5540
3.61k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
3.61k
    }
5542
58.8k
    if (URI) {
5543
54.8k
        xmlURIPtr uri;
5544
5545
54.8k
        uri = xmlParseURI((const char *) URI);
5546
54.8k
        if (uri == NULL) {
5547
1.83k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.83k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
53.0k
        } else {
5555
53.0k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
183
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
52.8k
      } else {
5562
52.8k
          if ((ctxt->sax != NULL) &&
5563
52.8k
        (!ctxt->disableSAX) &&
5564
52.8k
        (ctxt->sax->entityDecl != NULL))
5565
51.5k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
51.5k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
51.5k
              literal, URI, NULL);
5568
52.8k
      }
5569
53.0k
      xmlFreeURI(uri);
5570
53.0k
        }
5571
54.8k
    }
5572
58.8k
      }
5573
4.28M
  } else {
5574
4.28M
      if ((RAW == '"') || (RAW == '\'')) {
5575
1.68M
          value = xmlParseEntityValue(ctxt, &orig);
5576
1.68M
    if ((ctxt->sax != NULL) &&
5577
1.68M
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
1.58M
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
1.58M
        XML_INTERNAL_GENERAL_ENTITY,
5580
1.58M
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
1.68M
    if ((ctxt->myDoc == NULL) ||
5585
1.68M
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
24.0k
        if (ctxt->myDoc == NULL) {
5587
2.43k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
2.43k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
2.43k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
2.43k
        }
5594
24.0k
        if (ctxt->myDoc->intSubset == NULL)
5595
2.43k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
2.43k
              BAD_CAST "fake", NULL, NULL);
5597
5598
24.0k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
24.0k
                    NULL, NULL, value);
5600
24.0k
    }
5601
2.59M
      } else {
5602
2.59M
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
2.59M
    if ((URI == NULL) && (literal == NULL)) {
5604
41.9k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
41.9k
    }
5606
2.59M
    if (URI) {
5607
2.46M
        xmlURIPtr uri;
5608
5609
2.46M
        uri = xmlParseURI((const char *)URI);
5610
2.46M
        if (uri == NULL) {
5611
42.8k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
42.8k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
2.42M
        } else {
5619
2.42M
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
4.90k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
4.90k
      }
5626
2.42M
      xmlFreeURI(uri);
5627
2.42M
        }
5628
2.46M
    }
5629
2.59M
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
89.3k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
89.3k
           "Space required before 'NDATA'\n");
5632
89.3k
    }
5633
2.59M
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
1.51M
        SKIP(5);
5635
1.51M
        if (SKIP_BLANKS == 0) {
5636
9.65k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
9.65k
               "Space required after 'NDATA'\n");
5638
9.65k
        }
5639
1.51M
        ndata = xmlParseName(ctxt);
5640
1.51M
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
1.51M
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
1.09M
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
1.09M
            literal, URI, ndata);
5644
1.51M
    } else {
5645
1.08M
        if ((ctxt->sax != NULL) &&
5646
1.08M
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
659k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
659k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
659k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
1.08M
        if ((ctxt->replaceEntities != 0) &&
5655
1.08M
      ((ctxt->myDoc == NULL) ||
5656
466k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
3.19k
      if (ctxt->myDoc == NULL) {
5658
1.66k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.66k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.66k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.66k
      }
5665
5666
3.19k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.66k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.66k
            BAD_CAST "fake", NULL, NULL);
5669
3.19k
      xmlSAX2EntityDecl(ctxt, name,
5670
3.19k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
3.19k
                  literal, URI, NULL);
5672
3.19k
        }
5673
1.08M
    }
5674
2.59M
      }
5675
4.28M
  }
5676
7.04M
  if (ctxt->instate == XML_PARSER_EOF)
5677
1.06k
      goto done;
5678
7.04M
  SKIP_BLANKS;
5679
7.04M
  if (RAW != '>') {
5680
34.6k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
34.6k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
34.6k
      xmlHaltParser(ctxt);
5683
7.00M
  } else {
5684
7.00M
      if (inputid != ctxt->input->id) {
5685
1.57k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
1.57k
                         "Entity declaration doesn't start and stop in"
5687
1.57k
                               " the same entity\n");
5688
1.57k
      }
5689
7.00M
      NEXT;
5690
7.00M
  }
5691
7.04M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
4.34M
      xmlEntityPtr cur = NULL;
5696
5697
4.34M
      if (isParameter) {
5698
2.67M
          if ((ctxt->sax != NULL) &&
5699
2.67M
        (ctxt->sax->getParameterEntity != NULL))
5700
2.67M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
2.67M
      } else {
5702
1.67M
          if ((ctxt->sax != NULL) &&
5703
1.67M
        (ctxt->sax->getEntity != NULL))
5704
1.67M
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
1.67M
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
72.7k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
72.7k
    }
5708
1.67M
      }
5709
4.34M
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
4.06M
    cur->orig = orig;
5711
4.06M
                orig = NULL;
5712
4.06M
      }
5713
4.34M
  }
5714
5715
7.04M
done:
5716
7.04M
  if (value != NULL) xmlFree(value);
5717
7.04M
  if (URI != NULL) xmlFree(URI);
5718
7.04M
  if (literal != NULL) xmlFree(literal);
5719
7.04M
        if (orig != NULL) xmlFree(orig);
5720
7.04M
    }
5721
7.09M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
18.1M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
18.1M
    int val;
5757
18.1M
    xmlChar *ret;
5758
5759
18.1M
    *value = NULL;
5760
18.1M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
1.22M
  SKIP(9);
5762
1.22M
  return(XML_ATTRIBUTE_REQUIRED);
5763
1.22M
    }
5764
16.9M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
15.3M
  SKIP(8);
5766
15.3M
  return(XML_ATTRIBUTE_IMPLIED);
5767
15.3M
    }
5768
1.57M
    val = XML_ATTRIBUTE_NONE;
5769
1.57M
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
725k
  SKIP(6);
5771
725k
  val = XML_ATTRIBUTE_FIXED;
5772
725k
  if (SKIP_BLANKS == 0) {
5773
436
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
436
         "Space required after '#FIXED'\n");
5775
436
  }
5776
725k
    }
5777
1.57M
    ret = xmlParseAttValue(ctxt);
5778
1.57M
    ctxt->instate = XML_PARSER_DTD;
5779
1.57M
    if (ret == NULL) {
5780
16.0k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
16.0k
           "Attribute default value declaration error\n");
5782
16.0k
    } else
5783
1.56M
        *value = ret;
5784
1.57M
    return(val);
5785
16.9M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
54.0k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
54.0k
    const xmlChar *name;
5809
54.0k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
54.0k
    if (RAW != '(') {
5812
1.31k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1.31k
  return(NULL);
5814
1.31k
    }
5815
52.7k
    SHRINK;
5816
64.9k
    do {
5817
64.9k
        NEXT;
5818
64.9k
  SKIP_BLANKS;
5819
64.9k
        name = xmlParseName(ctxt);
5820
64.9k
  if (name == NULL) {
5821
1.30k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
1.30k
         "Name expected in NOTATION declaration\n");
5823
1.30k
            xmlFreeEnumeration(ret);
5824
1.30k
      return(NULL);
5825
1.30k
  }
5826
63.6k
  tmp = ret;
5827
93.1k
  while (tmp != NULL) {
5828
32.6k
      if (xmlStrEqual(name, tmp->name)) {
5829
3.12k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
3.12k
    "standalone: attribute notation value token %s duplicated\n",
5831
3.12k
         name, NULL);
5832
3.12k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
3.12k
    break;
5835
3.12k
      }
5836
29.4k
      tmp = tmp->next;
5837
29.4k
  }
5838
63.6k
  if (tmp == NULL) {
5839
60.5k
      cur = xmlCreateEnumeration(name);
5840
60.5k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
60.5k
      if (last == NULL) ret = last = cur;
5845
8.41k
      else {
5846
8.41k
    last->next = cur;
5847
8.41k
    last = cur;
5848
8.41k
      }
5849
60.5k
  }
5850
63.6k
  SKIP_BLANKS;
5851
63.6k
    } while (RAW == '|');
5852
51.4k
    if (RAW != ')') {
5853
4.00k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
4.00k
        xmlFreeEnumeration(ret);
5855
4.00k
  return(NULL);
5856
4.00k
    }
5857
47.4k
    NEXT;
5858
47.4k
    return(ret);
5859
51.4k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
1.59M
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
1.59M
    xmlChar *name;
5881
1.59M
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
1.59M
    if (RAW != '(') {
5884
25.0k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
25.0k
  return(NULL);
5886
25.0k
    }
5887
1.57M
    SHRINK;
5888
4.88M
    do {
5889
4.88M
        NEXT;
5890
4.88M
  SKIP_BLANKS;
5891
4.88M
        name = xmlParseNmtoken(ctxt);
5892
4.88M
  if (name == NULL) {
5893
2.57k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
2.57k
      return(ret);
5895
2.57k
  }
5896
4.87M
  tmp = ret;
5897
12.9M
  while (tmp != NULL) {
5898
8.05M
      if (xmlStrEqual(name, tmp->name)) {
5899
5.23k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
5.23k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
5.23k
         name, NULL);
5902
5.23k
    if (!xmlDictOwns(ctxt->dict, name))
5903
5.23k
        xmlFree(name);
5904
5.23k
    break;
5905
5.23k
      }
5906
8.04M
      tmp = tmp->next;
5907
8.04M
  }
5908
4.87M
  if (tmp == NULL) {
5909
4.87M
      cur = xmlCreateEnumeration(name);
5910
4.87M
      if (!xmlDictOwns(ctxt->dict, name))
5911
4.87M
    xmlFree(name);
5912
4.87M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
4.87M
      if (last == NULL) ret = last = cur;
5917
3.29M
      else {
5918
3.29M
    last->next = cur;
5919
3.29M
    last = cur;
5920
3.29M
      }
5921
4.87M
  }
5922
4.87M
  SKIP_BLANKS;
5923
4.87M
    } while (RAW == '|');
5924
1.57M
    if (RAW != ')') {
5925
6.78k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
6.78k
  return(ret);
5927
6.78k
    }
5928
1.56M
    NEXT;
5929
1.56M
    return(ret);
5930
1.57M
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
1.65M
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
1.65M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
56.0k
  SKIP(8);
5953
56.0k
  if (SKIP_BLANKS == 0) {
5954
1.95k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
1.95k
         "Space required after 'NOTATION'\n");
5956
1.95k
      return(0);
5957
1.95k
  }
5958
54.0k
  *tree = xmlParseNotationType(ctxt);
5959
54.0k
  if (*tree == NULL) return(0);
5960
47.4k
  return(XML_ATTRIBUTE_NOTATION);
5961
54.0k
    }
5962
1.59M
    *tree = xmlParseEnumerationType(ctxt);
5963
1.59M
    if (*tree == NULL) return(0);
5964
1.57M
    return(XML_ATTRIBUTE_ENUMERATION);
5965
1.59M
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
18.1M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
18.1M
    SHRINK;
6017
18.1M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
6.91M
  SKIP(5);
6019
6.91M
  return(XML_ATTRIBUTE_CDATA);
6020
11.2M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
165k
  SKIP(6);
6022
165k
  return(XML_ATTRIBUTE_IDREFS);
6023
11.1M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
272k
  SKIP(5);
6025
272k
  return(XML_ATTRIBUTE_IDREF);
6026
10.8M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
4.76M
        SKIP(2);
6028
4.76M
  return(XML_ATTRIBUTE_ID);
6029
6.07M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
53.4k
  SKIP(6);
6031
53.4k
  return(XML_ATTRIBUTE_ENTITY);
6032
6.02M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
362k
  SKIP(8);
6034
362k
  return(XML_ATTRIBUTE_ENTITIES);
6035
5.65M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
664k
  SKIP(8);
6037
664k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
4.99M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
3.33M
  SKIP(7);
6040
3.33M
  return(XML_ATTRIBUTE_NMTOKEN);
6041
3.33M
     }
6042
1.65M
     return(xmlParseEnumeratedType(ctxt, tree));
6043
18.1M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
6.52M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
6.52M
    const xmlChar *elemName;
6061
6.52M
    const xmlChar *attrName;
6062
6.52M
    xmlEnumerationPtr tree;
6063
6064
6.52M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
6.52M
    SKIP(2);
6067
6068
6.52M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
6.51M
  int inputid = ctxt->input->id;
6070
6071
6.51M
  SKIP(7);
6072
6.51M
  if (SKIP_BLANKS == 0) {
6073
30.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
30.9k
                     "Space required after '<!ATTLIST'\n");
6075
30.9k
  }
6076
6.51M
        elemName = xmlParseName(ctxt);
6077
6.51M
  if (elemName == NULL) {
6078
5.52k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
5.52k
         "ATTLIST: no name for Element\n");
6080
5.52k
      return;
6081
5.52k
  }
6082
6.51M
  SKIP_BLANKS;
6083
6.51M
  GROW;
6084
24.5M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
18.2M
      int type;
6086
18.2M
      int def;
6087
18.2M
      xmlChar *defaultValue = NULL;
6088
6089
18.2M
      GROW;
6090
18.2M
            tree = NULL;
6091
18.2M
      attrName = xmlParseName(ctxt);
6092
18.2M
      if (attrName == NULL) {
6093
23.3k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
23.3k
             "ATTLIST: no name for Attribute\n");
6095
23.3k
    break;
6096
23.3k
      }
6097
18.2M
      GROW;
6098
18.2M
      if (SKIP_BLANKS == 0) {
6099
18.7k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
18.7k
            "Space required after the attribute name\n");
6101
18.7k
    break;
6102
18.7k
      }
6103
6104
18.1M
      type = xmlParseAttributeType(ctxt, &tree);
6105
18.1M
      if (type <= 0) {
6106
35.1k
          break;
6107
35.1k
      }
6108
6109
18.1M
      GROW;
6110
18.1M
      if (SKIP_BLANKS == 0) {
6111
17.8k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
17.8k
             "Space required after the attribute type\n");
6113
17.8k
          if (tree != NULL)
6114
9.95k
        xmlFreeEnumeration(tree);
6115
17.8k
    break;
6116
17.8k
      }
6117
6118
18.1M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
18.1M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
18.1M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
554k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
18.1M
      GROW;
6130
18.1M
            if (RAW != '>') {
6131
16.6M
    if (SKIP_BLANKS == 0) {
6132
128k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
128k
      "Space required after the attribute default value\n");
6134
128k
        if (defaultValue != NULL)
6135
113k
      xmlFree(defaultValue);
6136
128k
        if (tree != NULL)
6137
16.2k
      xmlFreeEnumeration(tree);
6138
128k
        break;
6139
128k
    }
6140
16.6M
      }
6141
18.0M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
18.0M
    (ctxt->sax->attributeDecl != NULL))
6143
17.1M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
17.1M
                          type, def, defaultValue, tree);
6145
904k
      else if (tree != NULL)
6146
92.7k
    xmlFreeEnumeration(tree);
6147
6148
18.0M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
18.0M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
18.0M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
954k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
954k
      }
6153
18.0M
      if (ctxt->sax2) {
6154
11.3M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
11.3M
      }
6156
18.0M
      if (defaultValue != NULL)
6157
1.45M
          xmlFree(defaultValue);
6158
18.0M
      GROW;
6159
18.0M
  }
6160
6.51M
  if (RAW == '>') {
6161
6.31M
      if (inputid != ctxt->input->id) {
6162
28.9k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
28.9k
                               "Attribute list declaration doesn't start and"
6164
28.9k
                               " stop in the same entity\n");
6165
28.9k
      }
6166
6.31M
      NEXT;
6167
6.31M
  }
6168
6.51M
    }
6169
6.52M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
2.57M
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
2.57M
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
2.57M
    const xmlChar *elem = NULL;
6196
6197
2.57M
    GROW;
6198
2.57M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
2.57M
  SKIP(7);
6200
2.57M
  SKIP_BLANKS;
6201
2.57M
  SHRINK;
6202
2.57M
  if (RAW == ')') {
6203
1.44M
      if (ctxt->input->id != inputchk) {
6204
72
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
72
                               "Element content declaration doesn't start and"
6206
72
                               " stop in the same entity\n");
6207
72
      }
6208
1.44M
      NEXT;
6209
1.44M
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
1.44M
      if (ret == NULL)
6211
0
          return(NULL);
6212
1.44M
      if (RAW == '*') {
6213
401
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
401
    NEXT;
6215
401
      }
6216
1.44M
      return(ret);
6217
1.44M
  }
6218
1.12M
  if ((RAW == '(') || (RAW == '|')) {
6219
1.12M
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
1.12M
      if (ret == NULL) return(NULL);
6221
1.12M
  }
6222
12.3M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
11.2M
      NEXT;
6224
11.2M
      if (elem == NULL) {
6225
1.12M
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
1.12M
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
1.12M
    ret->c1 = cur;
6231
1.12M
    if (cur != NULL)
6232
1.12M
        cur->parent = ret;
6233
1.12M
    cur = ret;
6234
10.1M
      } else {
6235
10.1M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
10.1M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
10.1M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
10.1M
    if (n->c1 != NULL)
6242
10.1M
        n->c1->parent = n;
6243
10.1M
          cur->c2 = n;
6244
10.1M
    if (n != NULL)
6245
10.1M
        n->parent = cur;
6246
10.1M
    cur = n;
6247
10.1M
      }
6248
11.2M
      SKIP_BLANKS;
6249
11.2M
      elem = xmlParseName(ctxt);
6250
11.2M
      if (elem == NULL) {
6251
862
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
862
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
862
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
862
    return(NULL);
6255
862
      }
6256
11.2M
      SKIP_BLANKS;
6257
11.2M
      GROW;
6258
11.2M
  }
6259
1.12M
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
1.11M
      if (elem != NULL) {
6261
1.11M
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
1.11M
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
1.11M
    if (cur->c2 != NULL)
6264
1.11M
        cur->c2->parent = cur;
6265
1.11M
            }
6266
1.11M
            if (ret != NULL)
6267
1.11M
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
1.11M
      if (ctxt->input->id != inputchk) {
6269
841
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
841
                               "Element content declaration doesn't start and"
6271
841
                               " stop in the same entity\n");
6272
841
      }
6273
1.11M
      SKIP(2);
6274
1.11M
  } else {
6275
5.35k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
5.35k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
5.35k
      return(NULL);
6278
5.35k
  }
6279
6280
1.12M
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
1.11M
    return(ret);
6284
2.57M
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
3.42M
                                       int depth) {
6321
3.42M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
3.42M
    const xmlChar *elem;
6323
3.42M
    xmlChar type = 0;
6324
6325
3.42M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
3.42M
        (depth >  2048)) {
6327
123
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
123
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
123
                          depth);
6330
123
  return(NULL);
6331
123
    }
6332
3.42M
    SKIP_BLANKS;
6333
3.42M
    GROW;
6334
3.42M
    if (RAW == '(') {
6335
286k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
286k
  NEXT;
6339
286k
  SKIP_BLANKS;
6340
286k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
286k
                                                           depth + 1);
6342
286k
        if (cur == NULL)
6343
141k
            return(NULL);
6344
144k
  SKIP_BLANKS;
6345
144k
  GROW;
6346
3.13M
    } else {
6347
3.13M
  elem = xmlParseName(ctxt);
6348
3.13M
  if (elem == NULL) {
6349
20.3k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
20.3k
      return(NULL);
6351
20.3k
  }
6352
3.11M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
3.11M
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
3.11M
  GROW;
6358
3.11M
  if (RAW == '?') {
6359
263k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
263k
      NEXT;
6361
2.85M
  } else if (RAW == '*') {
6362
248k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
248k
      NEXT;
6364
2.60M
  } else if (RAW == '+') {
6365
576k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
576k
      NEXT;
6367
2.02M
  } else {
6368
2.02M
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
2.02M
  }
6370
3.11M
  GROW;
6371
3.11M
    }
6372
3.25M
    SKIP_BLANKS;
6373
3.25M
    SHRINK;
6374
14.3M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
11.1M
        if (RAW == ',') {
6379
2.84M
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
1.73M
      else if (type != CUR) {
6385
181
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
181
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
181
                      type);
6388
181
    if ((last != NULL) && (last != ret))
6389
181
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
181
    if (ret != NULL)
6391
181
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
181
    return(NULL);
6393
181
      }
6394
2.84M
      NEXT;
6395
6396
2.84M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
2.84M
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
2.84M
      if (last == NULL) {
6404
1.11M
    op->c1 = ret;
6405
1.11M
    if (ret != NULL)
6406
1.11M
        ret->parent = op;
6407
1.11M
    ret = cur = op;
6408
1.73M
      } else {
6409
1.73M
          cur->c2 = op;
6410
1.73M
    if (op != NULL)
6411
1.73M
        op->parent = cur;
6412
1.73M
    op->c1 = last;
6413
1.73M
    if (last != NULL)
6414
1.73M
        last->parent = op;
6415
1.73M
    cur =op;
6416
1.73M
    last = NULL;
6417
1.73M
      }
6418
8.27M
  } else if (RAW == '|') {
6419
8.24M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
7.28M
      else if (type != CUR) {
6425
160
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
160
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
160
          type);
6428
160
    if ((last != NULL) && (last != ret))
6429
160
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
160
    if (ret != NULL)
6431
160
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
160
    return(NULL);
6433
160
      }
6434
8.24M
      NEXT;
6435
6436
8.24M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
8.24M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
8.24M
      if (last == NULL) {
6445
964k
    op->c1 = ret;
6446
964k
    if (ret != NULL)
6447
964k
        ret->parent = op;
6448
964k
    ret = cur = op;
6449
7.28M
      } else {
6450
7.28M
          cur->c2 = op;
6451
7.28M
    if (op != NULL)
6452
7.28M
        op->parent = cur;
6453
7.28M
    op->c1 = last;
6454
7.28M
    if (last != NULL)
6455
7.28M
        last->parent = op;
6456
7.28M
    cur =op;
6457
7.28M
    last = NULL;
6458
7.28M
      }
6459
8.24M
  } else {
6460
27.6k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
27.6k
      if ((last != NULL) && (last != ret))
6462
6.89k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
27.6k
      if (ret != NULL)
6464
27.6k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
27.6k
      return(NULL);
6466
27.6k
  }
6467
11.0M
  GROW;
6468
11.0M
  SKIP_BLANKS;
6469
11.0M
  GROW;
6470
11.0M
  if (RAW == '(') {
6471
489k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
489k
      NEXT;
6474
489k
      SKIP_BLANKS;
6475
489k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
489k
                                                          depth + 1);
6477
489k
            if (last == NULL) {
6478
3.22k
    if (ret != NULL)
6479
3.22k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
3.22k
    return(NULL);
6481
3.22k
            }
6482
486k
      SKIP_BLANKS;
6483
10.6M
  } else {
6484
10.6M
      elem = xmlParseName(ctxt);
6485
10.6M
      if (elem == NULL) {
6486
3.96k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
3.96k
    if (ret != NULL)
6488
3.96k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
3.96k
    return(NULL);
6490
3.96k
      }
6491
10.6M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
10.6M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
10.6M
      if (RAW == '?') {
6498
1.04M
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
1.04M
    NEXT;
6500
9.55M
      } else if (RAW == '*') {
6501
685k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
685k
    NEXT;
6503
8.87M
      } else if (RAW == '+') {
6504
170k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
170k
    NEXT;
6506
8.70M
      } else {
6507
8.70M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
8.70M
      }
6509
10.6M
  }
6510
11.0M
  SKIP_BLANKS;
6511
11.0M
  GROW;
6512
11.0M
    }
6513
3.22M
    if ((cur != NULL) && (last != NULL)) {
6514
2.06M
        cur->c2 = last;
6515
2.06M
  if (last != NULL)
6516
2.06M
      last->parent = cur;
6517
2.06M
    }
6518
3.22M
    if (ctxt->input->id != inputchk) {
6519
4.11k
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
4.11k
                       "Element content declaration doesn't start and stop in"
6521
4.11k
                       " the same entity\n");
6522
4.11k
    }
6523
3.22M
    NEXT;
6524
3.22M
    if (RAW == '?') {
6525
87.2k
  if (ret != NULL) {
6526
87.2k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
87.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
1.38k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
85.8k
      else
6530
85.8k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
87.2k
  }
6532
87.2k
  NEXT;
6533
3.13M
    } else if (RAW == '*') {
6534
656k
  if (ret != NULL) {
6535
656k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
656k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
5.32M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
4.66M
    if ((cur->c1 != NULL) &&
6543
4.66M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
4.66M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
69.6k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
4.66M
    if ((cur->c2 != NULL) &&
6547
4.66M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
4.66M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
11.4k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
4.66M
    cur = cur->c2;
6551
4.66M
      }
6552
656k
  }
6553
656k
  NEXT;
6554
2.48M
    } else if (RAW == '+') {
6555
494k
  if (ret != NULL) {
6556
494k
      int found = 0;
6557
6558
494k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
494k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
393
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
494k
      else
6562
494k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
852k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
357k
    if ((cur->c1 != NULL) &&
6570
357k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
357k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
642
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
642
        found = 1;
6574
642
    }
6575
357k
    if ((cur->c2 != NULL) &&
6576
357k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
357k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
329
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
329
        found = 1;
6580
329
    }
6581
357k
    cur = cur->c2;
6582
357k
      }
6583
494k
      if (found)
6584
783
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
494k
  }
6586
494k
  NEXT;
6587
494k
    }
6588
3.22M
    return(ret);
6589
3.25M
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
5.21M
                           xmlElementContentPtr *result) {
6648
6649
5.21M
    xmlElementContentPtr tree = NULL;
6650
5.21M
    int inputid = ctxt->input->id;
6651
5.21M
    int res;
6652
6653
5.21M
    *result = NULL;
6654
6655
5.21M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
5.21M
    NEXT;
6661
5.21M
    GROW;
6662
5.21M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
5.21M
    SKIP_BLANKS;
6665
5.21M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
2.57M
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
2.57M
  res = XML_ELEMENT_TYPE_MIXED;
6668
2.64M
    } else {
6669
2.64M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
2.64M
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
2.64M
    }
6672
5.21M
    SKIP_BLANKS;
6673
5.21M
    *result = tree;
6674
5.21M
    return(res);
6675
5.21M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
6.59M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
6.59M
    const xmlChar *name;
6695
6.59M
    int ret = -1;
6696
6.59M
    xmlElementContentPtr content  = NULL;
6697
6698
6.59M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
6.59M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
6.59M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
6.59M
  int inputid = ctxt->input->id;
6705
6706
6.59M
  SKIP(7);
6707
6.59M
  if (SKIP_BLANKS == 0) {
6708
9.73k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
9.73k
               "Space required after 'ELEMENT'\n");
6710
9.73k
      return(-1);
6711
9.73k
  }
6712
6.58M
        name = xmlParseName(ctxt);
6713
6.58M
  if (name == NULL) {
6714
3.01k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
3.01k
         "xmlParseElementDecl: no name for Element\n");
6716
3.01k
      return(-1);
6717
3.01k
  }
6718
6.57M
  if (SKIP_BLANKS == 0) {
6719
11.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
11.0k
         "Space required after the element name\n");
6721
11.0k
  }
6722
6.57M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
1.28M
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
1.28M
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
5.28M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
5.28M
             (NXT(2) == 'Y')) {
6730
59.5k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
59.5k
      ret = XML_ELEMENT_TYPE_ANY;
6735
5.23M
  } else if (RAW == '(') {
6736
5.21M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
5.21M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
13.0k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
13.0k
          (ctxt->inputNr == 1)) {
6743
539
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
539
    "PEReference: forbidden within markup decl in internal subset\n");
6745
12.4k
      } else {
6746
12.4k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
12.4k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
12.4k
            }
6749
13.0k
      return(-1);
6750
13.0k
  }
6751
6752
6.56M
  SKIP_BLANKS;
6753
6754
6.56M
  if (RAW != '>') {
6755
37.0k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
37.0k
      if (content != NULL) {
6757
9.33k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
9.33k
      }
6759
6.52M
  } else {
6760
6.52M
      if (inputid != ctxt->input->id) {
6761
3.60k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
3.60k
                               "Element declaration doesn't start and stop in"
6763
3.60k
                               " the same entity\n");
6764
3.60k
      }
6765
6766
6.52M
      NEXT;
6767
6.52M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
6.52M
    (ctxt->sax->elementDecl != NULL)) {
6769
5.92M
    if (content != NULL)
6770
4.83M
        content->parent = NULL;
6771
5.92M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
5.92M
                           content);
6773
5.92M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
258k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
258k
    }
6782
5.92M
      } else if (content != NULL) {
6783
313k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
313k
      }
6785
6.52M
  }
6786
6.56M
    }
6787
6.57M
    return(ret);
6788
6.59M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
61.0k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
61.0k
    int *inputIds = NULL;
6806
61.0k
    size_t inputIdsSize = 0;
6807
61.0k
    size_t depth = 0;
6808
6809
731k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
731k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
224k
            int id = ctxt->input->id;
6812
6813
224k
            SKIP(3);
6814
224k
            SKIP_BLANKS;
6815
6816
224k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
181k
                SKIP(7);
6818
181k
                SKIP_BLANKS;
6819
181k
                if (RAW != '[') {
6820
328
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
328
                    xmlHaltParser(ctxt);
6822
328
                    goto error;
6823
328
                }
6824
181k
                if (ctxt->input->id != id) {
6825
59
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
59
                                   "All markup of the conditional section is"
6827
59
                                   " not in the same entity\n");
6828
59
                }
6829
181k
                NEXT;
6830
6831
181k
                if (inputIdsSize <= depth) {
6832
26.2k
                    int *tmp;
6833
6834
26.2k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
26.2k
                    tmp = (int *) xmlRealloc(inputIds,
6836
26.2k
                            inputIdsSize * sizeof(int));
6837
26.2k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
26.2k
                    inputIds = tmp;
6842
26.2k
                }
6843
181k
                inputIds[depth] = id;
6844
181k
                depth++;
6845
181k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
39.9k
                size_t ignoreDepth = 0;
6847
6848
39.9k
                SKIP(6);
6849
39.9k
                SKIP_BLANKS;
6850
39.9k
                if (RAW != '[') {
6851
190
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
190
                    xmlHaltParser(ctxt);
6853
190
                    goto error;
6854
190
                }
6855
39.7k
                if (ctxt->input->id != id) {
6856
30
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
30
                                   "All markup of the conditional section is"
6858
30
                                   " not in the same entity\n");
6859
30
                }
6860
39.7k
                NEXT;
6861
6862
27.9M
                while (RAW != 0) {
6863
27.9M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
325k
                        SKIP(3);
6865
325k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
325k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
27.6M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
27.6M
                               (NXT(2) == '>')) {
6873
47.3k
                        if (ignoreDepth == 0)
6874
36.6k
                            break;
6875
10.6k
                        SKIP(3);
6876
10.6k
                        ignoreDepth--;
6877
27.5M
                    } else {
6878
27.5M
                        NEXT;
6879
27.5M
                    }
6880
27.9M
                }
6881
6882
39.7k
    if (RAW == 0) {
6883
3.08k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
3.08k
                    goto error;
6885
3.08k
    }
6886
36.6k
                if (ctxt->input->id != id) {
6887
10
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
10
                                   "All markup of the conditional section is"
6889
10
                                   " not in the same entity\n");
6890
10
                }
6891
36.6k
                SKIP(3);
6892
36.6k
            } else {
6893
2.63k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
2.63k
                xmlHaltParser(ctxt);
6895
2.63k
                goto error;
6896
2.63k
            }
6897
507k
        } else if ((depth > 0) &&
6898
507k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
33.3k
            depth--;
6900
33.3k
            if (ctxt->input->id != inputIds[depth]) {
6901
896
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
896
                               "All markup of the conditional section is not"
6903
896
                               " in the same entity\n");
6904
896
            }
6905
33.3k
            SKIP(3);
6906
473k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
468k
            xmlParseMarkupDecl(ctxt);
6908
468k
        } else {
6909
4.77k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
4.77k
            xmlHaltParser(ctxt);
6911
4.77k
            goto error;
6912
4.77k
        }
6913
6914
720k
        if (depth == 0)
6915
49.6k
            break;
6916
6917
670k
        SKIP_BLANKS;
6918
670k
        GROW;
6919
670k
    }
6920
6921
61.0k
error:
6922
61.0k
    xmlFree(inputIds);
6923
61.0k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
445M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
445M
    GROW;
6952
445M
    if (CUR == '<') {
6953
445M
        if (NXT(1) == '!') {
6954
442M
      switch (NXT(2)) {
6955
13.6M
          case 'E':
6956
13.6M
        if (NXT(3) == 'L')
6957
6.59M
      xmlParseElementDecl(ctxt);
6958
7.09M
        else if (NXT(3) == 'N')
6959
7.09M
      xmlParseEntityDecl(ctxt);
6960
1.75k
                    else
6961
1.75k
                        SKIP(2);
6962
13.6M
        break;
6963
6.52M
          case 'A':
6964
6.52M
        xmlParseAttributeListDecl(ctxt);
6965
6.52M
        break;
6966
875k
          case 'N':
6967
875k
        xmlParseNotationDecl(ctxt);
6968
875k
        break;
6969
420M
          case '-':
6970
420M
        xmlParseComment(ctxt);
6971
420M
        break;
6972
918k
    default:
6973
        /* there is an error but it will be detected later */
6974
918k
                    SKIP(2);
6975
918k
        break;
6976
442M
      }
6977
442M
  } else if (NXT(1) == '?') {
6978
2.88M
      xmlParsePI(ctxt);
6979
2.88M
  }
6980
445M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
445M
    if (ctxt->instate == XML_PARSER_EOF)
6987
36.1k
        return;
6988
6989
445M
    ctxt->instate = XML_PARSER_DTD;
6990
445M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
30.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
30.9k
    xmlChar *version;
7006
30.9k
    const xmlChar *encoding;
7007
30.9k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
30.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
30.3k
  SKIP(5);
7014
30.3k
    } else {
7015
514
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
514
  return;
7017
514
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
30.3k
    oldstate = ctxt->instate;
7021
30.3k
    ctxt->instate = XML_PARSER_START;
7022
7023
30.3k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
30.3k
    version = xmlParseVersionInfo(ctxt);
7032
30.3k
    if (version == NULL)
7033
3.95k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
26.4k
    else {
7035
26.4k
  if (SKIP_BLANKS == 0) {
7036
1.61k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.61k
               "Space needed here\n");
7038
1.61k
  }
7039
26.4k
    }
7040
30.3k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
30.3k
    encoding = xmlParseEncodingDecl(ctxt);
7046
30.3k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
30.3k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
460
        ctxt->instate = oldstate;
7053
460
        return;
7054
460
    }
7055
29.9k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
7.67k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
7.67k
           "Missing encoding in text declaration\n");
7058
7.67k
    }
7059
7060
29.9k
    SKIP_BLANKS;
7061
29.9k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
21.1k
        SKIP(2);
7063
21.1k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
239
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
239
  NEXT;
7067
8.50k
    } else {
7068
8.50k
        int c;
7069
7070
8.50k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
628k
        while ((c = CUR) != 0) {
7072
625k
            NEXT;
7073
625k
            if (c == '>')
7074
5.05k
                break;
7075
625k
        }
7076
8.50k
    }
7077
7078
29.9k
    ctxt->instate = oldstate;
7079
29.9k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
129k
                       const xmlChar *SystemID) {
7096
129k
    xmlDetectSAX2(ctxt);
7097
129k
    GROW;
7098
7099
129k
    if ((ctxt->encoding == NULL) &&
7100
129k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
129k
        xmlChar start[4];
7102
129k
  xmlCharEncoding enc;
7103
7104
129k
  start[0] = RAW;
7105
129k
  start[1] = NXT(1);
7106
129k
  start[2] = NXT(2);
7107
129k
  start[3] = NXT(3);
7108
129k
  enc = xmlDetectCharEncoding(start, 4);
7109
129k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
21.4k
      xmlSwitchEncoding(ctxt, enc);
7111
129k
    }
7112
7113
129k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
19.8k
  xmlParseTextDecl(ctxt);
7115
19.8k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
317
      xmlHaltParser(ctxt);
7120
317
      return;
7121
317
  }
7122
19.8k
    }
7123
129k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
129k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
129k
    ctxt->instate = XML_PARSER_DTD;
7135
129k
    ctxt->external = 1;
7136
129k
    SKIP_BLANKS;
7137
92.4M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
92.3M
  GROW;
7139
92.3M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
61.0k
            xmlParseConditionalSections(ctxt);
7141
92.2M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
92.2M
            xmlParseMarkupDecl(ctxt);
7143
92.2M
        } else {
7144
36.4k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
36.4k
            xmlHaltParser(ctxt);
7146
36.4k
            return;
7147
36.4k
        }
7148
92.2M
        SKIP_BLANKS;
7149
92.2M
    }
7150
7151
92.9k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
92.9k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
23.0M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
23.0M
    xmlEntityPtr ent;
7175
23.0M
    xmlChar *val;
7176
23.0M
    int was_checked;
7177
23.0M
    xmlNodePtr list = NULL;
7178
23.0M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
23.0M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
23.0M
    if (NXT(1) == '#') {
7188
937k
  int i = 0;
7189
937k
  xmlChar out[16];
7190
937k
  int hex = NXT(2);
7191
937k
  int value = xmlParseCharRef(ctxt);
7192
7193
937k
  if (value == 0)
7194
123k
      return;
7195
814k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
605k
      if (value <= 0xFF) {
7202
582k
    out[0] = value;
7203
582k
    out[1] = 0;
7204
582k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
582k
        (!ctxt->disableSAX))
7206
450k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
582k
      } else {
7208
23.1k
    if ((hex == 'x') || (hex == 'X'))
7209
5.05k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
18.0k
    else
7211
18.0k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
23.1k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
23.1k
        (!ctxt->disableSAX))
7214
15.3k
        ctxt->sax->reference(ctxt->userData, out);
7215
23.1k
      }
7216
605k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
209k
      COPY_BUF(0 ,out, i, value);
7221
209k
      out[i] = 0;
7222
209k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
209k
    (!ctxt->disableSAX))
7224
184k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
209k
  }
7226
814k
  return;
7227
937k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
22.1M
    ent = xmlParseEntityRef(ctxt);
7233
22.1M
    if (ent == NULL) return;
7234
20.8M
    if (!ctxt->wellFormed)
7235
12.8M
  return;
7236
8.07M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
8.07M
    if ((ent->name == NULL) ||
7240
8.07M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
750k
  val = ent->content;
7242
750k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
750k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
750k
      (!ctxt->disableSAX))
7248
750k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
750k
  return;
7250
750k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
7.32M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
7.32M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
349k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
339k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
339k
  void *user_data;
7273
339k
  if (ctxt->userData == ctxt)
7274
339k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
339k
        ctxt->sizeentcopy = 0;
7280
7281
339k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
1.42k
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
1.42k
            xmlHaltParser(ctxt);
7284
1.42k
            return;
7285
1.42k
        }
7286
7287
338k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
338k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
210k
      ctxt->depth++;
7297
210k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
210k
                                                user_data, &list);
7299
210k
      ctxt->depth--;
7300
7301
210k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
128k
      ctxt->depth++;
7303
128k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
128k
                                     user_data, ctxt->depth, ent->URI,
7305
128k
             ent->ExternalID, &list);
7306
128k
      ctxt->depth--;
7307
128k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
338k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
338k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
338k
        ent->expandedSize = ctxt->sizeentcopy;
7316
338k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
16.3k
            xmlHaltParser(ctxt);
7318
16.3k
      xmlFreeNodeList(list);
7319
16.3k
      return;
7320
16.3k
  }
7321
322k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
12
      xmlFreeNodeList(list);
7323
12
      return;
7324
12
  }
7325
7326
322k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
192k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
192k
            if ((ctxt->replaceEntities == 0) ||
7333
192k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
192k
                ((list->type == XML_TEXT_NODE) &&
7335
176k
                 (list->next == NULL))) {
7336
176k
                ent->owner = 1;
7337
2.31M
                while (list != NULL) {
7338
2.14M
                    list->parent = (xmlNodePtr) ent;
7339
2.14M
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
2.14M
                    if (list->next == NULL)
7342
176k
                        ent->last = list;
7343
2.14M
                    list = list->next;
7344
2.14M
                }
7345
176k
                list = NULL;
7346
176k
            } else {
7347
15.5k
                ent->owner = 0;
7348
3.81M
                while (list != NULL) {
7349
3.80M
                    list->parent = (xmlNodePtr) ctxt->node;
7350
3.80M
                    list->doc = ctxt->myDoc;
7351
3.80M
                    if (list->next == NULL)
7352
15.5k
                        ent->last = list;
7353
3.80M
                    list = list->next;
7354
3.80M
                }
7355
15.5k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
15.5k
            }
7361
192k
  } else if ((ret != XML_ERR_OK) &&
7362
130k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
75.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
75.7k
         "Entity '%s' failed to parse\n", ent->name);
7365
75.7k
            if (ent->content != NULL)
7366
25.7k
                ent->content[0] = 0;
7367
75.7k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
322k
        was_checked = 0;
7374
322k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
7.30M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
1.79M
  if (was_checked != 0) {
7389
1.65M
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
1.65M
      if (ctxt->userData == ctxt)
7396
1.65M
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
1.65M
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
121k
    ctxt->depth++;
7402
121k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
121k
           ent->content, user_data, NULL);
7404
121k
    ctxt->depth--;
7405
1.53M
      } else if (ent->etype ==
7406
1.53M
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
1.53M
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
1.53M
    ctxt->depth++;
7410
1.53M
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
1.53M
         ctxt->sax, user_data, ctxt->depth,
7412
1.53M
         ent->URI, ent->ExternalID, NULL);
7413
1.53M
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
1.53M
                ctxt->sizeentities = oldsizeentities;
7417
1.53M
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
1.65M
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
1.65M
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
1.65M
  }
7429
1.79M
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
1.79M
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
243k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
243k
  }
7437
1.79M
  return;
7438
1.79M
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
5.51M
    if ((was_checked != 0) &&
7445
5.51M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
771
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
5.51M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
5.51M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
2.50M
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
2.50M
  return;
7458
2.50M
    }
7459
7460
3.00M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
3.00M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
3.00M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
3.00M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
904k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
904k
    cur = ent->children;
7492
2.75M
    while (cur != NULL) {
7493
2.75M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
2.75M
        if (nw != NULL) {
7495
2.75M
      if (nw->_private == NULL)
7496
2.75M
          nw->_private = cur->_private;
7497
2.75M
      if (firstChild == NULL){
7498
904k
          firstChild = nw;
7499
904k
      }
7500
2.75M
      nw = xmlAddChild(ctxt->node, nw);
7501
2.75M
        }
7502
2.75M
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
904k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
904k
          (nw != NULL) &&
7509
904k
          (nw->type == XML_ELEMENT_NODE) &&
7510
904k
          (nw->children == NULL))
7511
2.31k
          nw->extra = 1;
7512
7513
904k
      break;
7514
904k
        }
7515
1.84M
        cur = cur->next;
7516
1.84M
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
2.09M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
2.09M
    xmlNodePtr nw = NULL, cur, next, last,
7523
2.09M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
2.09M
    cur = ent->children;
7532
2.09M
    ent->children = NULL;
7533
2.09M
    last = ent->last;
7534
2.09M
    ent->last = NULL;
7535
16.6M
    while (cur != NULL) {
7536
16.6M
        next = cur->next;
7537
16.6M
        cur->next = NULL;
7538
16.6M
        cur->parent = NULL;
7539
16.6M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
16.6M
        if (nw != NULL) {
7541
16.6M
      if (nw->_private == NULL)
7542
16.6M
          nw->_private = cur->_private;
7543
16.6M
      if (firstChild == NULL){
7544
2.09M
          firstChild = cur;
7545
2.09M
      }
7546
16.6M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
16.6M
        }
7548
16.6M
        xmlAddChild(ctxt->node, cur);
7549
16.6M
        if (cur == last)
7550
2.09M
      break;
7551
14.5M
        cur = next;
7552
14.5M
    }
7553
2.09M
    if (ent->owner == 0)
7554
15.5k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
2.09M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
3.00M
      ctxt->nodemem = 0;
7582
3.00M
      ctxt->nodelen = 0;
7583
3.00M
      return;
7584
3.00M
  }
7585
3.00M
    }
7586
3.00M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
30.1M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
30.1M
    const xmlChar *name;
7621
30.1M
    xmlEntityPtr ent = NULL;
7622
7623
30.1M
    GROW;
7624
30.1M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
30.1M
    if (RAW != '&')
7628
0
        return(NULL);
7629
30.1M
    NEXT;
7630
30.1M
    name = xmlParseName(ctxt);
7631
30.1M
    if (name == NULL) {
7632
579k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
579k
           "xmlParseEntityRef: no name\n");
7634
579k
        return(NULL);
7635
579k
    }
7636
29.5M
    if (RAW != ';') {
7637
232k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
232k
  return(NULL);
7639
232k
    }
7640
29.3M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
29.3M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
19.4M
        ent = xmlGetPredefinedEntity(name);
7647
19.4M
        if (ent != NULL)
7648
1.53M
            return(ent);
7649
19.4M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
27.8M
    if (ctxt->sax != NULL) {
7656
27.8M
  if (ctxt->sax->getEntity != NULL)
7657
27.8M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
27.8M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
27.8M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
35.7k
      ent = xmlGetPredefinedEntity(name);
7661
27.8M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
27.8M
      (ctxt->userData==ctxt)) {
7663
104k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
104k
  }
7665
27.8M
    }
7666
27.8M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
27.8M
    if (ent == NULL) {
7690
1.13M
  if ((ctxt->standalone == 1) ||
7691
1.13M
      ((ctxt->hasExternalSubset == 0) &&
7692
1.10M
       (ctxt->hasPErefs == 0))) {
7693
668k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
668k
         "Entity '%s' not defined\n", name);
7695
668k
  } else {
7696
470k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
470k
         "Entity '%s' not defined\n", name);
7698
470k
      if ((ctxt->inSubset == 0) &&
7699
470k
    (ctxt->sax != NULL) &&
7700
470k
    (ctxt->sax->reference != NULL)) {
7701
457k
    ctxt->sax->reference(ctxt->userData, name);
7702
457k
      }
7703
470k
  }
7704
1.13M
  ctxt->valid = 0;
7705
1.13M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
26.6M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
3.12k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
3.12k
     "Entity reference to unparsed entity %s\n", name);
7715
3.12k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
26.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
26.6M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
61.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
61.8k
       "Attribute references external entity '%s'\n", name);
7726
61.8k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
26.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
26.6M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
6.89M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
91.3k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
3.43k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
91.3k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
91.3k
        }
7740
6.89M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
18.0k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
18.0k
                    "'<' in entity '%s' is not allowed in attributes "
7743
18.0k
                    "values\n", name);
7744
6.89M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
19.7M
    else {
7750
19.7M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
19.7M
      default:
7758
19.7M
      break;
7759
19.7M
  }
7760
19.7M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
27.8M
    return(ent);
7769
27.8M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
8.45G
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
8.45G
    xmlChar *name;
7805
8.45G
    const xmlChar *ptr;
7806
8.45G
    xmlChar cur;
7807
8.45G
    xmlEntityPtr ent = NULL;
7808
7809
8.45G
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
8.45G
    ptr = *str;
7812
8.45G
    cur = *ptr;
7813
8.45G
    if (cur != '&')
7814
8.23G
  return(NULL);
7815
7816
220M
    ptr++;
7817
220M
    name = xmlParseStringName(ctxt, &ptr);
7818
220M
    if (name == NULL) {
7819
3.96k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
3.96k
           "xmlParseStringEntityRef: no name\n");
7821
3.96k
  *str = ptr;
7822
3.96k
  return(NULL);
7823
3.96k
    }
7824
220M
    if (*ptr != ';') {
7825
132k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
132k
        xmlFree(name);
7827
132k
  *str = ptr;
7828
132k
  return(NULL);
7829
132k
    }
7830
220M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
220M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
140M
        ent = xmlGetPredefinedEntity(name);
7838
140M
        if (ent != NULL) {
7839
280k
            xmlFree(name);
7840
280k
            *str = ptr;
7841
280k
            return(ent);
7842
280k
        }
7843
140M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
219M
    if (ctxt->sax != NULL) {
7850
219M
  if (ctxt->sax->getEntity != NULL)
7851
219M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
219M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
16.8M
      ent = xmlGetPredefinedEntity(name);
7854
219M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
37.5M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
37.5M
  }
7857
219M
    }
7858
219M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
219M
    if (ent == NULL) {
7885
37.5M
  if ((ctxt->standalone == 1) ||
7886
37.5M
      ((ctxt->hasExternalSubset == 0) &&
7887
37.5M
       (ctxt->hasPErefs == 0))) {
7888
37.4M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
37.4M
         "Entity '%s' not defined\n", name);
7890
37.4M
  } else {
7891
98.4k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
98.4k
        "Entity '%s' not defined\n",
7893
98.4k
        name);
7894
98.4k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
37.5M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
182M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
1.29k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
1.29k
     "Entity reference to unparsed entity %s\n", name);
7906
1.29k
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
182M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
182M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
87.2k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
87.2k
   "Attribute references external entity '%s'\n", name);
7917
87.2k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
182M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
182M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
178M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
53.1k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.17k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
53.1k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
53.1k
        }
7931
178M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
2.20M
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
2.20M
                    "'<' in entity '%s' is not allowed in attributes "
7934
2.20M
                    "values\n", name);
7935
178M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
3.58M
    else {
7941
3.58M
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
3.58M
      default:
7949
3.58M
      break;
7950
3.58M
  }
7951
3.58M
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
219M
    xmlFree(name);
7961
219M
    *str = ptr;
7962
219M
    return(ent);
7963
219M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
418M
{
8000
418M
    const xmlChar *name;
8001
418M
    xmlEntityPtr entity = NULL;
8002
418M
    xmlParserInputPtr input;
8003
8004
418M
    if (RAW != '%')
8005
0
        return;
8006
418M
    NEXT;
8007
418M
    name = xmlParseName(ctxt);
8008
418M
    if (name == NULL) {
8009
1.08M
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
1.08M
  return;
8011
1.08M
    }
8012
417M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
417M
    if (RAW != ';') {
8016
7.06M
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
7.06M
        return;
8018
7.06M
    }
8019
8020
410M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
410M
    if ((ctxt->sax != NULL) &&
8026
410M
  (ctxt->sax->getParameterEntity != NULL))
8027
410M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
410M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
410M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
58.8M
  if ((ctxt->standalone == 1) ||
8040
58.8M
      ((ctxt->hasExternalSubset == 0) &&
8041
58.8M
       (ctxt->hasPErefs == 0))) {
8042
5.00k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
5.00k
            "PEReference: %%%s; not found\n",
8044
5.00k
            name);
8045
58.8M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
58.8M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
11.4M
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
11.4M
                                 "PEReference: %%%s; not found\n",
8056
11.4M
                                 name, NULL);
8057
11.4M
            } else
8058
47.4M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
47.4M
                              "PEReference: %%%s; not found\n",
8060
47.4M
                              name, NULL);
8061
58.8M
            ctxt->valid = 0;
8062
58.8M
  }
8063
351M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
351M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
351M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
351M
  } else {
8073
351M
            xmlChar start[4];
8074
351M
            xmlCharEncoding enc;
8075
351M
            unsigned long parentConsumed;
8076
351M
            xmlEntityPtr oldEnt;
8077
8078
351M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
351M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
351M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
351M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
351M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
351M
    (ctxt->replaceEntities == 0) &&
8084
351M
    (ctxt->validate == 0))
8085
301
    return;
8086
8087
351M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
1.05k
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
1.05k
                xmlHaltParser(ctxt);
8090
1.05k
                return;
8091
1.05k
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
351M
            parentConsumed = ctxt->input->parentConsumed;
8095
351M
            oldEnt = ctxt->input->entity;
8096
351M
            if ((oldEnt == NULL) ||
8097
351M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
346M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
7.48M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
7.48M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
7.48M
                                     ctxt->input->cur - ctxt->input->base);
8102
7.48M
            }
8103
8104
351M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
351M
      if (xmlPushInput(ctxt, input) < 0) {
8106
6.39k
                xmlFreeInputStream(input);
8107
6.39k
    return;
8108
6.39k
            }
8109
8110
351M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
351M
            input->parentConsumed = parentConsumed;
8113
8114
351M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
30.9k
                GROW
8125
30.9k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
30.9k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
30.8k
                    start[0] = RAW;
8129
30.8k
                    start[1] = NXT(1);
8130
30.8k
                    start[2] = NXT(2);
8131
30.8k
                    start[3] = NXT(3);
8132
30.8k
                    enc = xmlDetectCharEncoding(start, 4);
8133
30.8k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
3.82k
                        xmlSwitchEncoding(ctxt, enc);
8135
3.82k
                    }
8136
30.8k
                }
8137
8138
30.9k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
30.9k
                    (IS_BLANK_CH(NXT(5)))) {
8140
2.93k
                    xmlParseTextDecl(ctxt);
8141
2.93k
                }
8142
30.9k
            }
8143
351M
  }
8144
351M
    }
8145
410M
    ctxt->hasPErefs = 1;
8146
410M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
11.1k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
11.1k
    xmlParserInputPtr input;
8162
11.1k
    xmlBufferPtr buf;
8163
11.1k
    int l, c;
8164
11.1k
    int count = 0;
8165
8166
11.1k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
11.1k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
11.1k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
11.1k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
11.1k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
11.1k
    buf = xmlBufferCreate();
8180
11.1k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
11.1k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
11.1k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
11.1k
    if (input == NULL) {
8189
1.94k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
1.94k
              "xmlLoadEntityContent input error");
8191
1.94k
  xmlBufferFree(buf);
8192
1.94k
        return(-1);
8193
1.94k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
9.19k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
9.19k
    GROW;
8206
9.19k
    c = CUR_CHAR(l);
8207
16.2M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
16.2M
           (IS_CHAR(c))) {
8209
16.2M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
16.2M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
155k
      count = 0;
8212
155k
      GROW;
8213
155k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
155k
  }
8218
16.2M
  NEXTL(l);
8219
16.2M
  c = CUR_CHAR(l);
8220
16.2M
  if (c == 0) {
8221
7.92k
      count = 0;
8222
7.92k
      GROW;
8223
7.92k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
7.92k
      c = CUR_CHAR(l);
8228
7.92k
  }
8229
16.2M
    }
8230
8231
9.19k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
5.67k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
5.67k
        xmlPopInput(ctxt);
8234
5.67k
    } else if (!IS_CHAR(c)) {
8235
3.52k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
3.52k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
3.52k
                    c);
8238
3.52k
  xmlBufferFree(buf);
8239
3.52k
  return(-1);
8240
3.52k
    }
8241
5.67k
    entity->content = buf->content;
8242
5.67k
    entity->length = buf->use;
8243
5.67k
    buf->content = NULL;
8244
5.67k
    xmlBufferFree(buf);
8245
8246
5.67k
    return(0);
8247
9.19k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
3.05M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
3.05M
    const xmlChar *ptr;
8283
3.05M
    xmlChar cur;
8284
3.05M
    xmlChar *name;
8285
3.05M
    xmlEntityPtr entity = NULL;
8286
8287
3.05M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
3.05M
    ptr = *str;
8289
3.05M
    cur = *ptr;
8290
3.05M
    if (cur != '%')
8291
0
        return(NULL);
8292
3.05M
    ptr++;
8293
3.05M
    name = xmlParseStringName(ctxt, &ptr);
8294
3.05M
    if (name == NULL) {
8295
49.6k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
49.6k
           "xmlParseStringPEReference: no name\n");
8297
49.6k
  *str = ptr;
8298
49.6k
  return(NULL);
8299
49.6k
    }
8300
3.01M
    cur = *ptr;
8301
3.01M
    if (cur != ';') {
8302
4.05k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
4.05k
  xmlFree(name);
8304
4.05k
  *str = ptr;
8305
4.05k
  return(NULL);
8306
4.05k
    }
8307
3.00M
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
3.00M
    if ((ctxt->sax != NULL) &&
8313
3.00M
  (ctxt->sax->getParameterEntity != NULL))
8314
3.00M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
3.00M
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
3.00M
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
131k
  if ((ctxt->standalone == 1) ||
8330
131k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
4.08k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
4.08k
     "PEReference: %%%s; not found\n", name);
8333
127k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
127k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
127k
        "PEReference: %%%s; not found\n",
8343
127k
        name, NULL);
8344
127k
      ctxt->valid = 0;
8345
127k
  }
8346
2.87M
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
2.87M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
2.87M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
2.87M
    }
8357
3.00M
    ctxt->hasPErefs = 1;
8358
3.00M
    xmlFree(name);
8359
3.00M
    *str = ptr;
8360
3.00M
    return(entity);
8361
3.00M
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
670k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
670k
    const xmlChar *name = NULL;
8382
670k
    xmlChar *ExternalID = NULL;
8383
670k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
670k
    SKIP(9);
8389
8390
670k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
670k
    name = xmlParseName(ctxt);
8396
670k
    if (name == NULL) {
8397
2.87k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
2.87k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
2.87k
    }
8400
670k
    ctxt->intSubName = name;
8401
8402
670k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
670k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
670k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
307k
        ctxt->hasExternalSubset = 1;
8411
307k
    }
8412
670k
    ctxt->extSubURI = URI;
8413
670k
    ctxt->extSubSystem = ExternalID;
8414
8415
670k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
670k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
670k
  (!ctxt->disableSAX))
8422
650k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
670k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
670k
    if (RAW == '[')
8431
459k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
210k
    if (RAW != '>') {
8437
33.4k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
33.4k
    }
8439
210k
    NEXT;
8440
210k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
463k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
463k
    if (RAW == '[') {
8457
463k
        int baseInputNr = ctxt->inputNr;
8458
463k
        ctxt->instate = XML_PARSER_DTD;
8459
463k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
463k
  SKIP_BLANKS;
8466
354M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
354M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
354M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
354M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
354M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
353M
          xmlParseMarkupDecl(ctxt);
8478
353M
            } else if (RAW == '%') {
8479
970k
          xmlParsePEReference(ctxt);
8480
970k
            } else {
8481
87.1k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
87.1k
                        "xmlParseInternalSubset: error detected in"
8483
87.1k
                        " Markup declaration\n");
8484
87.1k
                xmlHaltParser(ctxt);
8485
87.1k
                return;
8486
87.1k
            }
8487
354M
      SKIP_BLANKS;
8488
354M
  }
8489
376k
  if (RAW == ']') {
8490
348k
      NEXT;
8491
348k
      SKIP_BLANKS;
8492
348k
  }
8493
376k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
376k
    if (RAW != '>') {
8499
30.7k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
30.7k
  return;
8501
30.7k
    }
8502
345k
    NEXT;
8503
345k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
35.1M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
35.1M
    const xmlChar *name;
8544
35.1M
    xmlChar *val;
8545
8546
35.1M
    *value = NULL;
8547
35.1M
    GROW;
8548
35.1M
    name = xmlParseName(ctxt);
8549
35.1M
    if (name == NULL) {
8550
803k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
803k
                 "error parsing attribute name\n");
8552
803k
        return(NULL);
8553
803k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
34.3M
    SKIP_BLANKS;
8559
34.3M
    if (RAW == '=') {
8560
33.9M
        NEXT;
8561
33.9M
  SKIP_BLANKS;
8562
33.9M
  val = xmlParseAttValue(ctxt);
8563
33.9M
  ctxt->instate = XML_PARSER_CONTENT;
8564
33.9M
    } else {
8565
372k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
372k
         "Specification mandates value for attribute %s\n", name);
8567
372k
  return(name);
8568
372k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
33.9M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
59.8k
  if (!xmlCheckLanguageID(val)) {
8577
30.8k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
30.8k
              "Malformed value for xml:lang : %s\n",
8579
30.8k
        val, NULL);
8580
30.8k
  }
8581
59.8k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
33.9M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
4.22k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
374
      *(ctxt->space) = 0;
8589
3.85k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
1.37k
      *(ctxt->space) = 1;
8591
2.47k
  else {
8592
2.47k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
2.47k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
2.47k
                                 val, NULL);
8595
2.47k
  }
8596
4.22k
    }
8597
8598
33.9M
    *value = val;
8599
33.9M
    return(name);
8600
34.3M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
29.0M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
29.0M
    const xmlChar *name;
8634
29.0M
    const xmlChar *attname;
8635
29.0M
    xmlChar *attvalue;
8636
29.0M
    const xmlChar **atts = ctxt->atts;
8637
29.0M
    int nbatts = 0;
8638
29.0M
    int maxatts = ctxt->maxatts;
8639
29.0M
    int i;
8640
8641
29.0M
    if (RAW != '<') return(NULL);
8642
29.0M
    NEXT1;
8643
8644
29.0M
    name = xmlParseName(ctxt);
8645
29.0M
    if (name == NULL) {
8646
297k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
297k
       "xmlParseStartTag: invalid element name\n");
8648
297k
        return(NULL);
8649
297k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
28.7M
    SKIP_BLANKS;
8657
28.7M
    GROW;
8658
8659
43.7M
    while (((RAW != '>') &&
8660
43.7M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
43.7M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
35.1M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
35.1M
        if (attname == NULL) {
8664
803k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
803k
         "xmlParseStartTag: problem parsing attributes\n");
8666
803k
      break;
8667
803k
  }
8668
34.3M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
48.3M
      for (i = 0; i < nbatts;i += 2) {
8675
14.4M
          if (xmlStrEqual(atts[i], attname)) {
8676
37.3k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
37.3k
        xmlFree(attvalue);
8678
37.3k
        goto failed;
8679
37.3k
    }
8680
14.4M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
33.8M
      if (atts == NULL) {
8685
170k
          maxatts = 22; /* allow for 10 attrs by default */
8686
170k
          atts = (const xmlChar **)
8687
170k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
170k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
170k
    ctxt->atts = atts;
8695
170k
    ctxt->maxatts = maxatts;
8696
33.6M
      } else if (nbatts + 4 > maxatts) {
8697
497
          const xmlChar **n;
8698
8699
497
          maxatts *= 2;
8700
497
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
497
               maxatts * sizeof(const xmlChar *));
8702
497
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
497
    atts = n;
8709
497
    ctxt->atts = atts;
8710
497
    ctxt->maxatts = maxatts;
8711
497
      }
8712
33.8M
      atts[nbatts++] = attname;
8713
33.8M
      atts[nbatts++] = attvalue;
8714
33.8M
      atts[nbatts] = NULL;
8715
33.8M
      atts[nbatts + 1] = NULL;
8716
33.8M
  } else {
8717
414k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
414k
  }
8720
8721
34.3M
failed:
8722
8723
34.3M
  GROW
8724
34.3M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
19.3M
      break;
8726
14.9M
  if (SKIP_BLANKS == 0) {
8727
800k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
800k
         "attributes construct error\n");
8729
800k
  }
8730
14.9M
  SHRINK;
8731
14.9M
        GROW;
8732
14.9M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
28.7M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
28.7M
  (!ctxt->disableSAX)) {
8739
26.6M
  if (nbatts > 0)
8740
18.2M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
8.45M
  else
8742
8.45M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
26.6M
    }
8744
8745
28.7M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
61.8M
        for (i = 1;i < nbatts;i+=2)
8748
33.8M
      if (atts[i] != NULL)
8749
33.8M
         xmlFree((xmlChar *) atts[i]);
8750
27.9M
    }
8751
28.7M
    return(name);
8752
28.7M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
12.9M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
12.9M
    const xmlChar *name;
8772
8773
12.9M
    GROW;
8774
12.9M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
12.9M
    SKIP(2);
8780
8781
12.9M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
12.9M
    GROW;
8787
12.9M
    SKIP_BLANKS;
8788
12.9M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
102k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
102k
    } else
8791
12.8M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
12.9M
    if (name != (xmlChar*)1) {
8800
298k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
298k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
298k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
298k
                    ctxt->name, line, name);
8804
298k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
12.9M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
12.9M
  (!ctxt->disableSAX))
8811
12.3M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
12.9M
    namePop(ctxt);
8814
12.9M
    spacePop(ctxt);
8815
12.9M
    return;
8816
12.9M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
39.9M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
39.9M
    int i;
8858
8859
39.9M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
74.6M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
37.6M
        if (ctxt->nsTab[i] == prefix) {
8862
2.38M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
57.7k
          return(NULL);
8864
2.33M
      return(ctxt->nsTab[i + 1]);
8865
2.38M
  }
8866
36.9M
    return(NULL);
8867
39.3M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
85.7M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
85.7M
    const xmlChar *l, *p;
8886
8887
85.7M
    GROW;
8888
8889
85.7M
    l = xmlParseNCName(ctxt);
8890
85.7M
    if (l == NULL) {
8891
999k
        if (CUR == ':') {
8892
32.9k
      l = xmlParseName(ctxt);
8893
32.9k
      if (l != NULL) {
8894
32.9k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
32.9k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
32.9k
    *prefix = NULL;
8897
32.9k
    return(l);
8898
32.9k
      }
8899
32.9k
  }
8900
966k
        return(NULL);
8901
999k
    }
8902
84.7M
    if (CUR == ':') {
8903
3.29M
        NEXT;
8904
3.29M
  p = l;
8905
3.29M
  l = xmlParseNCName(ctxt);
8906
3.29M
  if (l == NULL) {
8907
61.9k
      xmlChar *tmp;
8908
8909
61.9k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
61.9k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
61.9k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
61.9k
      l = xmlParseNmtoken(ctxt);
8914
61.9k
      if (l == NULL) {
8915
42.3k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
42.3k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
42.3k
            } else {
8919
19.5k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
19.5k
    xmlFree((char *)l);
8921
19.5k
      }
8922
61.9k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
61.9k
      if (tmp != NULL) xmlFree(tmp);
8924
61.9k
      *prefix = NULL;
8925
61.9k
      return(p);
8926
61.9k
  }
8927
3.22M
  if (CUR == ':') {
8928
80.2k
      xmlChar *tmp;
8929
8930
80.2k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
80.2k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
80.2k
      NEXT;
8933
80.2k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
80.2k
      if (tmp != NULL) {
8935
61.0k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
61.0k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
61.0k
    if (tmp != NULL) xmlFree(tmp);
8938
61.0k
    *prefix = p;
8939
61.0k
    return(l);
8940
61.0k
      }
8941
19.1k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
19.1k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
19.1k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
19.1k
      if (tmp != NULL) xmlFree(tmp);
8946
19.1k
      *prefix = p;
8947
19.1k
      return(l);
8948
19.1k
  }
8949
3.14M
  *prefix = p;
8950
3.14M
    } else
8951
81.4M
        *prefix = NULL;
8952
84.6M
    return(l);
8953
84.7M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
630k
                        xmlChar const *prefix) {
8971
630k
    const xmlChar *cmp;
8972
630k
    const xmlChar *in;
8973
630k
    const xmlChar *ret;
8974
630k
    const xmlChar *prefix2;
8975
8976
630k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
630k
    GROW;
8979
630k
    in = ctxt->input->cur;
8980
8981
630k
    cmp = prefix;
8982
2.20M
    while (*in != 0 && *in == *cmp) {
8983
1.57M
  ++in;
8984
1.57M
  ++cmp;
8985
1.57M
    }
8986
630k
    if ((*cmp == 0) && (*in == ':')) {
8987
574k
        in++;
8988
574k
  cmp = name;
8989
4.60M
  while (*in != 0 && *in == *cmp) {
8990
4.03M
      ++in;
8991
4.03M
      ++cmp;
8992
4.03M
  }
8993
574k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
470k
            ctxt->input->col += in - ctxt->input->cur;
8996
470k
      ctxt->input->cur = in;
8997
470k
      return((const xmlChar*) 1);
8998
470k
  }
8999
574k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
160k
    ret = xmlParseQName (ctxt, &prefix2);
9004
160k
    if ((ret == name) && (prefix == prefix2))
9005
2.54k
  return((const xmlChar*) 1);
9006
157k
    return ret;
9007
160k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
34.8k
    const xmlChar *oldbase = ctxt->input->base;\
9045
34.8k
    GROW;\
9046
34.8k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
34.8k
        return(NULL);\
9048
34.8k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
34.8k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
81.3M
{
9059
81.3M
    xmlChar limit = 0;
9060
81.3M
    const xmlChar *in = NULL, *start, *end, *last;
9061
81.3M
    xmlChar *ret = NULL;
9062
81.3M
    int line, col;
9063
81.3M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
27.2M
                    XML_MAX_HUGE_LENGTH :
9065
81.3M
                    XML_MAX_TEXT_LENGTH;
9066
9067
81.3M
    GROW;
9068
81.3M
    in = (xmlChar *) CUR_PTR;
9069
81.3M
    line = ctxt->input->line;
9070
81.3M
    col = ctxt->input->col;
9071
81.3M
    if (*in != '"' && *in != '\'') {
9072
116k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
116k
        return (NULL);
9074
116k
    }
9075
81.2M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
81.2M
    limit = *in++;
9083
81.2M
    col++;
9084
81.2M
    end = ctxt->input->end;
9085
81.2M
    start = in;
9086
81.2M
    if (in >= end) {
9087
1.84k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
1.84k
    }
9089
81.2M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
3.48M
  while ((in < end) && (*in != limit) &&
9094
3.48M
         ((*in == 0x20) || (*in == 0x9) ||
9095
3.46M
          (*in == 0xA) || (*in == 0xD))) {
9096
445k
      if (*in == 0xA) {
9097
162k
          line++; col = 1;
9098
282k
      } else {
9099
282k
          col++;
9100
282k
      }
9101
445k
      in++;
9102
445k
      start = in;
9103
445k
      if (in >= end) {
9104
448
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
448
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
448
      }
9111
445k
  }
9112
29.6M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
29.6M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
26.6M
      col++;
9115
26.6M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
26.6M
      if (in >= end) {
9117
693
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
693
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
693
      }
9124
26.6M
  }
9125
3.03M
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
3.06M
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
3.20M
  while ((in < end) && (*in != limit) &&
9131
3.20M
         ((*in == 0x20) || (*in == 0x9) ||
9132
388k
          (*in == 0xA) || (*in == 0xD))) {
9133
165k
      if (*in == 0xA) {
9134
82.5k
          line++, col = 1;
9135
82.6k
      } else {
9136
82.6k
          col++;
9137
82.6k
      }
9138
165k
      in++;
9139
165k
      if (in >= end) {
9140
450
    const xmlChar *oldbase = ctxt->input->base;
9141
450
    GROW;
9142
450
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
450
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
450
    end = ctxt->input->end;
9151
450
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
450
      }
9157
165k
  }
9158
3.03M
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
3.03M
  if (*in != limit) goto need_complex;
9164
78.2M
    } else {
9165
802M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
802M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
724M
      in++;
9168
724M
      col++;
9169
724M
      if (in >= end) {
9170
31.8k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
31.8k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
31.8k
      }
9177
724M
  }
9178
78.2M
  last = in;
9179
78.2M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
78.2M
  if (*in != limit) goto need_complex;
9185
78.2M
    }
9186
78.8M
    in++;
9187
78.8M
    col++;
9188
78.8M
    if (len != NULL) {
9189
44.4M
        if (alloc) *alloc = 0;
9190
44.4M
        *len = last - start;
9191
44.4M
        ret = (xmlChar *) start;
9192
44.4M
    } else {
9193
34.3M
        if (alloc) *alloc = 1;
9194
34.3M
        ret = xmlStrndup(start, last - start);
9195
34.3M
    }
9196
78.8M
    CUR_PTR = in;
9197
78.8M
    ctxt->input->line = line;
9198
78.8M
    ctxt->input->col = col;
9199
78.8M
    return ret;
9200
2.39M
need_complex:
9201
2.39M
    if (alloc) *alloc = 1;
9202
2.39M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
81.2M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
46.5M
{
9226
46.5M
    const xmlChar *name;
9227
46.5M
    xmlChar *val, *internal_val = NULL;
9228
46.5M
    int normalize = 0;
9229
9230
46.5M
    *value = NULL;
9231
46.5M
    GROW;
9232
46.5M
    name = xmlParseQName(ctxt, prefix);
9233
46.5M
    if (name == NULL) {
9234
454k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
454k
                       "error parsing attribute name\n");
9236
454k
        return (NULL);
9237
454k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
46.1M
    if (ctxt->attsSpecial != NULL) {
9243
5.90M
        int type;
9244
9245
5.90M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
5.90M
                                                 pref, elem, *prefix, name);
9247
5.90M
        if (type != 0)
9248
3.04M
            normalize = 1;
9249
5.90M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
46.1M
    SKIP_BLANKS;
9255
46.1M
    if (RAW == '=') {
9256
45.8M
        NEXT;
9257
45.8M
        SKIP_BLANKS;
9258
45.8M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
45.8M
        if (val == NULL)
9260
61.4k
            return (NULL);
9261
45.7M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
3.03M
      if (*alloc) {
9269
224k
          const xmlChar *val2;
9270
9271
224k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
224k
    if ((val2 != NULL) && (val2 != val)) {
9273
43.1k
        xmlFree(val);
9274
43.1k
        val = (xmlChar *) val2;
9275
43.1k
    }
9276
224k
      }
9277
3.03M
  }
9278
45.7M
        ctxt->instate = XML_PARSER_CONTENT;
9279
45.7M
    } else {
9280
313k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
313k
                          "Specification mandates value for attribute %s\n",
9282
313k
                          name);
9283
313k
        return (name);
9284
313k
    }
9285
9286
45.7M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
211k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
36.0k
            internal_val = xmlStrndup(val, *len);
9294
36.0k
            if (!xmlCheckLanguageID(internal_val)) {
9295
19.0k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
19.0k
                              "Malformed value for xml:lang : %s\n",
9297
19.0k
                              internal_val, NULL);
9298
19.0k
            }
9299
36.0k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
211k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
5.59k
            internal_val = xmlStrndup(val, *len);
9306
5.59k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
426
                *(ctxt->space) = 0;
9308
5.17k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
1.93k
                *(ctxt->space) = 1;
9310
3.24k
            else {
9311
3.24k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
3.24k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
3.24k
                              internal_val, NULL);
9314
3.24k
            }
9315
5.59k
        }
9316
211k
        if (internal_val) {
9317
41.6k
            xmlFree(internal_val);
9318
41.6k
        }
9319
211k
    }
9320
9321
45.7M
    *value = val;
9322
45.7M
    return (name);
9323
46.1M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
38.9M
                  const xmlChar **URI, int *tlen) {
9356
38.9M
    const xmlChar *localname;
9357
38.9M
    const xmlChar *prefix;
9358
38.9M
    const xmlChar *attname;
9359
38.9M
    const xmlChar *aprefix;
9360
38.9M
    const xmlChar *nsname;
9361
38.9M
    xmlChar *attvalue;
9362
38.9M
    const xmlChar **atts = ctxt->atts;
9363
38.9M
    int maxatts = ctxt->maxatts;
9364
38.9M
    int nratts, nbatts, nbdef, inputid;
9365
38.9M
    int i, j, nbNs, attval;
9366
38.9M
    unsigned long cur;
9367
38.9M
    int nsNr = ctxt->nsNr;
9368
9369
38.9M
    if (RAW != '<') return(NULL);
9370
38.9M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
38.9M
    SHRINK;
9380
38.9M
    cur = ctxt->input->cur - ctxt->input->base;
9381
38.9M
    inputid = ctxt->input->id;
9382
38.9M
    nbatts = 0;
9383
38.9M
    nratts = 0;
9384
38.9M
    nbdef = 0;
9385
38.9M
    nbNs = 0;
9386
38.9M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
38.9M
    ctxt->nsNr = nsNr;
9389
9390
38.9M
    localname = xmlParseQName(ctxt, &prefix);
9391
38.9M
    if (localname == NULL) {
9392
503k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
503k
           "StartTag: invalid element name\n");
9394
503k
        return(NULL);
9395
503k
    }
9396
38.4M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
38.4M
    SKIP_BLANKS;
9404
38.4M
    GROW;
9405
9406
57.7M
    while (((RAW != '>') &&
9407
57.7M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
57.7M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
46.5M
  int len = -1, alloc = 0;
9410
9411
46.5M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
46.5M
                               &aprefix, &attvalue, &len, &alloc);
9413
46.5M
        if (attname == NULL) {
9414
515k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
515k
           "xmlParseStartTag: problem parsing attributes\n");
9416
515k
      break;
9417
515k
  }
9418
46.0M
        if (attvalue == NULL)
9419
313k
            goto next_attr;
9420
45.7M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
45.7M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
425k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
425k
            xmlURIPtr uri;
9425
9426
425k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
425k
            if (*URL != 0) {
9434
400k
                uri = xmlParseURI((const char *) URL);
9435
400k
                if (uri == NULL) {
9436
126k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
126k
                             "xmlns: '%s' is not a valid URI\n",
9438
126k
                                       URL, NULL, NULL);
9439
273k
                } else {
9440
273k
                    if (uri->scheme == NULL) {
9441
101k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
101k
                                  "xmlns: URI %s is not absolute\n",
9443
101k
                                  URL, NULL, NULL);
9444
101k
                    }
9445
273k
                    xmlFreeURI(uri);
9446
273k
                }
9447
400k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
400k
                if ((len == 29) &&
9456
400k
                    (xmlStrEqual(URL,
9457
2.69k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
464
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
464
                         "reuse of the xmlns namespace name is forbidden\n",
9460
464
                             NULL, NULL, NULL);
9461
464
                    goto next_attr;
9462
464
                }
9463
400k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
452k
            for (j = 1;j <= nbNs;j++)
9468
43.7k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
16.6k
                    break;
9470
425k
            if (j <= nbNs)
9471
16.6k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
408k
            else
9473
408k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
45.3M
        } else if (aprefix == ctxt->str_xmlns) {
9476
548k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
548k
            xmlURIPtr uri;
9478
9479
548k
            if (attname == ctxt->str_xml) {
9480
16.9k
                if (URL != ctxt->str_xml_ns) {
9481
16.9k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
16.9k
                             "xml namespace prefix mapped to wrong URI\n",
9483
16.9k
                             NULL, NULL, NULL);
9484
16.9k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
16.9k
                goto next_attr;
9489
16.9k
            }
9490
531k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
531k
            if (attname == ctxt->str_xmlns) {
9499
661
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
661
                         "redefinition of the xmlns prefix is forbidden\n",
9501
661
                         NULL, NULL, NULL);
9502
661
                goto next_attr;
9503
661
            }
9504
530k
            if ((len == 29) &&
9505
530k
                (xmlStrEqual(URL,
9506
7.76k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
354
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
354
                         "reuse of the xmlns namespace name is forbidden\n",
9509
354
                         NULL, NULL, NULL);
9510
354
                goto next_attr;
9511
354
            }
9512
530k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
1.90k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
1.90k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
1.90k
                              attname, NULL, NULL);
9516
1.90k
                goto next_attr;
9517
528k
            } else {
9518
528k
                uri = xmlParseURI((const char *) URL);
9519
528k
                if (uri == NULL) {
9520
138k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
138k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
138k
                                       attname, URL, NULL);
9523
390k
                } else {
9524
390k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
23.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
23.7k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
23.7k
                                  attname, URL, NULL);
9528
23.7k
                    }
9529
390k
                    xmlFreeURI(uri);
9530
390k
                }
9531
528k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
976k
            for (j = 1;j <= nbNs;j++)
9537
470k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
23.3k
                    break;
9539
528k
            if (j <= nbNs)
9540
23.3k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
505k
            else
9542
505k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
44.7M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
44.7M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
204k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
204k
                maxatts = ctxt->maxatts;
9553
204k
                atts = ctxt->atts;
9554
204k
            }
9555
44.7M
            ctxt->attallocs[nratts++] = alloc;
9556
44.7M
            atts[nbatts++] = attname;
9557
44.7M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
44.7M
            if (alloc)
9565
1.03M
                atts[nbatts++] = NULL;
9566
43.7M
            else
9567
43.7M
                atts[nbatts++] = ctxt->input->base;
9568
44.7M
            atts[nbatts++] = attvalue;
9569
44.7M
            attvalue += len;
9570
44.7M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
44.7M
            if (alloc != 0) attval = 1;
9575
44.7M
            attvalue = NULL; /* moved into atts */
9576
44.7M
        }
9577
9578
46.0M
next_attr:
9579
46.0M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
264k
            xmlFree(attvalue);
9581
264k
            attvalue = NULL;
9582
264k
        }
9583
9584
46.0M
  GROW
9585
46.0M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
46.0M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
25.9M
      break;
9589
20.1M
  if (SKIP_BLANKS == 0) {
9590
848k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
848k
         "attributes construct error\n");
9592
848k
      break;
9593
848k
  }
9594
19.2M
        GROW;
9595
19.2M
    }
9596
9597
38.4M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
83.2M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
44.7M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
43.7M
            const xmlChar *old = atts[i+2];
9612
43.7M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
43.7M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
43.7M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
43.7M
        }
9616
44.7M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
38.4M
    if (ctxt->attsDefault != NULL) {
9622
10.0M
        xmlDefAttrsPtr defaults;
9623
9624
10.0M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
10.0M
  if (defaults != NULL) {
9626
2.04M
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
1.38M
          attname = defaults->values[5 * i];
9628
1.38M
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
1.38M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
49.3k
        for (j = 1;j <= nbNs;j++)
9638
18.4k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
6.51k
          break;
9640
37.4k
              if (j <= nbNs) continue;
9641
9642
30.9k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
30.9k
        if (nsname != defaults->values[5 * i + 2]) {
9644
15.0k
      if (nsPush(ctxt, NULL,
9645
15.0k
                 defaults->values[5 * i + 2]) > 0)
9646
14.6k
          nbNs++;
9647
15.0k
        }
9648
1.34M
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
67.4k
        for (j = 1;j <= nbNs;j++)
9653
25.5k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
9.65k
          break;
9655
51.5k
              if (j <= nbNs) continue;
9656
9657
41.9k
        nsname = xmlGetNamespace(ctxt, attname);
9658
41.9k
        if (nsname != defaults->values[5 * i + 2]) {
9659
15.1k
      if (nsPush(ctxt, attname,
9660
15.1k
                 defaults->values[5 * i + 2]) > 0)
9661
15.1k
          nbNs++;
9662
15.1k
        }
9663
1.29M
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
3.44M
        for (j = 0;j < nbatts;j+=5) {
9668
2.18M
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
30.5k
          break;
9670
2.18M
        }
9671
1.29M
        if (j < nbatts) continue;
9672
9673
1.26M
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
8.52k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
8.52k
      maxatts = ctxt->maxatts;
9679
8.52k
      atts = ctxt->atts;
9680
8.52k
        }
9681
1.26M
        atts[nbatts++] = attname;
9682
1.26M
        atts[nbatts++] = aprefix;
9683
1.26M
        if (aprefix == NULL)
9684
999k
      atts[nbatts++] = NULL;
9685
265k
        else
9686
265k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
1.26M
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
1.26M
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
1.26M
        if ((ctxt->standalone == 1) &&
9690
1.26M
            (defaults->values[5 * i + 4] != NULL)) {
9691
73
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
73
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
73
                                   attname, localname);
9694
73
        }
9695
1.26M
        nbdef++;
9696
1.26M
    }
9697
1.38M
      }
9698
659k
  }
9699
10.0M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
84.5M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
46.0M
  if (atts[i + 1] != NULL) {
9709
1.16M
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
1.16M
      if (nsname == NULL) {
9711
265k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
265k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
265k
        atts[i + 1], atts[i], localname);
9714
265k
      }
9715
1.16M
      atts[i + 2] = nsname;
9716
1.16M
  } else
9717
44.8M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
67.2M
        for (j = 0; j < i;j += 5) {
9725
21.2M
      if (atts[i] == atts[j]) {
9726
114k
          if (atts[i+1] == atts[j+1]) {
9727
53.7k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
53.7k
        break;
9729
53.7k
    }
9730
60.3k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
4.04k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
4.04k
           "Namespaced Attribute %s in '%s' redefined\n",
9733
4.04k
           atts[i], nsname, NULL);
9734
4.04k
        break;
9735
4.04k
    }
9736
60.3k
      }
9737
21.2M
  }
9738
46.0M
    }
9739
9740
38.4M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
38.4M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
656k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
656k
           "Namespace prefix %s on %s is not defined\n",
9744
656k
     prefix, localname, NULL);
9745
656k
    }
9746
38.4M
    *pref = prefix;
9747
38.4M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
38.4M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
38.4M
  (!ctxt->disableSAX)) {
9754
33.5M
  if (nbNs > 0)
9755
434k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
434k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
434k
        nbatts / 5, nbdef, atts);
9758
33.1M
  else
9759
33.1M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
33.1M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
33.5M
    }
9762
9763
38.4M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
38.4M
    if (attval != 0) {
9768
2.14M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
1.20M
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
1.03M
          xmlFree((xmlChar *) atts[i]);
9771
940k
    }
9772
9773
38.4M
    return(localname);
9774
38.4M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
17.0M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
17.0M
    const xmlChar *name;
9794
9795
17.0M
    GROW;
9796
17.0M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
17.0M
    SKIP(2);
9801
9802
17.0M
    if (tag->prefix == NULL)
9803
16.3M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
630k
    else
9805
630k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
17.0M
    GROW;
9811
17.0M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
17.0M
    SKIP_BLANKS;
9814
17.0M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
146k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
146k
    } else
9817
16.8M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
17.0M
    if (name != (xmlChar*)1) {
9826
397k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
397k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
397k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
397k
                    ctxt->name, tag->line, name);
9830
397k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
17.0M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
17.0M
  (!ctxt->disableSAX))
9837
14.9M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
14.9M
                                tag->URI);
9839
9840
17.0M
    spacePop(ctxt);
9841
17.0M
    if (tag->nsNr != 0)
9842
50.4k
  nsPop(ctxt, tag->nsNr);
9843
17.0M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
141k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
141k
    xmlChar *buf = NULL;
9864
141k
    int len = 0;
9865
141k
    int size = XML_PARSER_BUFFER_SIZE;
9866
141k
    int r, rl;
9867
141k
    int s, sl;
9868
141k
    int cur, l;
9869
141k
    int count = 0;
9870
141k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
45.4k
                    XML_MAX_HUGE_LENGTH :
9872
141k
                    XML_MAX_TEXT_LENGTH;
9873
9874
141k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
141k
    SKIP(3);
9877
9878
141k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
141k
    SKIP(6);
9881
9882
141k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
141k
    r = CUR_CHAR(rl);
9884
141k
    if (!IS_CHAR(r)) {
9885
2.97k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
2.97k
        goto out;
9887
2.97k
    }
9888
138k
    NEXTL(rl);
9889
138k
    s = CUR_CHAR(sl);
9890
138k
    if (!IS_CHAR(s)) {
9891
3.02k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
3.02k
        goto out;
9893
3.02k
    }
9894
135k
    NEXTL(sl);
9895
135k
    cur = CUR_CHAR(l);
9896
135k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
135k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
64.6M
    while (IS_CHAR(cur) &&
9902
64.6M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
64.5M
  if (len + 5 >= size) {
9904
96.1k
      xmlChar *tmp;
9905
9906
96.1k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
96.1k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
96.1k
      buf = tmp;
9912
96.1k
      size *= 2;
9913
96.1k
  }
9914
64.5M
  COPY_BUF(rl,buf,len,r);
9915
64.5M
  r = s;
9916
64.5M
  rl = sl;
9917
64.5M
  s = cur;
9918
64.5M
  sl = l;
9919
64.5M
  count++;
9920
64.5M
  if (count > 50) {
9921
1.21M
      SHRINK;
9922
1.21M
      GROW;
9923
1.21M
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
1.21M
      count = 0;
9927
1.21M
  }
9928
64.5M
  NEXTL(l);
9929
64.5M
  cur = CUR_CHAR(l);
9930
64.5M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
64.5M
    }
9936
135k
    buf[len] = 0;
9937
135k
    if (cur != '>') {
9938
13.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
13.3k
                       "CData section not finished\n%.50s\n", buf);
9940
13.3k
        goto out;
9941
13.3k
    }
9942
122k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
122k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
82.6k
  if (ctxt->sax->cdataBlock != NULL)
9949
51.0k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
31.5k
  else if (ctxt->sax->characters != NULL)
9951
31.5k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
82.6k
    }
9953
9954
141k
out:
9955
141k
    if (ctxt->instate != XML_PARSER_EOF)
9956
141k
        ctxt->instate = XML_PARSER_CONTENT;
9957
141k
    xmlFree(buf);
9958
141k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
710k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
710k
    int nameNr = ctxt->nameNr;
9971
9972
710k
    GROW;
9973
95.9M
    while ((RAW != 0) &&
9974
95.9M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
95.2M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
95.2M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
150k
      xmlParsePI(ctxt);
9982
150k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
95.1M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
141k
      xmlParseCDSect(ctxt);
9990
141k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
94.9M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
94.9M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
531k
      xmlParseComment(ctxt);
9998
531k
      ctxt->instate = XML_PARSER_CONTENT;
9999
531k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
94.4M
  else if (*cur == '<') {
10005
42.1M
            if (NXT(1) == '/') {
10006
12.7M
                if (ctxt->nameNr <= nameNr)
10007
79.2k
                    break;
10008
12.7M
          xmlParseElementEnd(ctxt);
10009
29.3M
            } else {
10010
29.3M
          xmlParseElementStart(ctxt);
10011
29.3M
            }
10012
42.1M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
52.3M
  else if (*cur == '&') {
10020
10.2M
      xmlParseReference(ctxt);
10021
10.2M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
42.0M
  else {
10027
42.0M
      xmlParseCharData(ctxt, 0);
10028
42.0M
  }
10029
10030
95.2M
  GROW;
10031
95.2M
  SHRINK;
10032
95.2M
    }
10033
710k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
529k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
529k
    int nameNr = ctxt->nameNr;
10047
10048
529k
    xmlParseContentInternal(ctxt);
10049
10050
529k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
9.42k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
9.42k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
9.42k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
9.42k
                "Premature end of data in tag %s line %d\n",
10055
9.42k
    name, line, NULL);
10056
9.42k
    }
10057
529k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
266k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
266k
    if (xmlParseElementStart(ctxt) != 0)
10078
85.2k
        return;
10079
10080
181k
    xmlParseContentInternal(ctxt);
10081
181k
    if (ctxt->instate == XML_PARSER_EOF)
10082
957
  return;
10083
10084
180k
    if (CUR == 0) {
10085
103k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
103k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
103k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
103k
                "Premature end of data in tag %s line %d\n",
10089
103k
    name, line, NULL);
10090
103k
        return;
10091
103k
    }
10092
10093
76.4k
    xmlParseElementEnd(ctxt);
10094
76.4k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
29.5M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
29.5M
    const xmlChar *name;
10108
29.5M
    const xmlChar *prefix = NULL;
10109
29.5M
    const xmlChar *URI = NULL;
10110
29.5M
    xmlParserNodeInfo node_info;
10111
29.5M
    int line, tlen = 0;
10112
29.5M
    xmlNodePtr ret;
10113
29.5M
    int nsNr = ctxt->nsNr;
10114
10115
29.5M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
29.5M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
148
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
148
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
148
        xmlParserMaxDepth);
10120
148
  xmlHaltParser(ctxt);
10121
148
  return(-1);
10122
148
    }
10123
10124
    /* Capture start position */
10125
29.5M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
29.5M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
29.5M
    else if (*ctxt->space == -2)
10134
4.05M
  spacePush(ctxt, -1);
10135
25.5M
    else
10136
25.5M
  spacePush(ctxt, *ctxt->space);
10137
10138
29.5M
    line = ctxt->input->line;
10139
29.5M
#ifdef LIBXML_SAX1_ENABLED
10140
29.5M
    if (ctxt->sax2)
10141
17.3M
#endif /* LIBXML_SAX1_ENABLED */
10142
17.3M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
12.2M
#ifdef LIBXML_SAX1_ENABLED
10144
12.2M
    else
10145
12.2M
  name = xmlParseStartTag(ctxt);
10146
29.5M
#endif /* LIBXML_SAX1_ENABLED */
10147
29.5M
    if (ctxt->instate == XML_PARSER_EOF)
10148
1.03k
  return(-1);
10149
29.5M
    if (name == NULL) {
10150
764k
  spacePop(ctxt);
10151
764k
        return(-1);
10152
764k
    }
10153
28.8M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
28.8M
    ret = ctxt->node;
10155
10156
28.8M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
28.8M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
28.8M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
28.8M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
28.8M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
14.2M
        SKIP(2);
10172
14.2M
  if (ctxt->sax2) {
10173
8.33M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
8.33M
    (!ctxt->disableSAX))
10175
5.90M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
8.33M
#ifdef LIBXML_SAX1_ENABLED
10177
8.33M
  } else {
10178
5.90M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
5.90M
    (!ctxt->disableSAX))
10180
4.59M
    ctxt->sax->endElement(ctxt->userData, name);
10181
5.90M
#endif /* LIBXML_SAX1_ENABLED */
10182
5.90M
  }
10183
14.2M
  namePop(ctxt);
10184
14.2M
  spacePop(ctxt);
10185
14.2M
  if (nsNr != ctxt->nsNr)
10186
27.2k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
14.2M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
14.2M
  return(1);
10195
14.2M
    }
10196
14.5M
    if (RAW == '>') {
10197
13.5M
        NEXT1;
10198
13.5M
    } else {
10199
997k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
997k
         "Couldn't find end of Start Tag %s line %d\n",
10201
997k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
997k
  nodePop(ctxt);
10207
997k
  namePop(ctxt);
10208
997k
  spacePop(ctxt);
10209
997k
  if (nsNr != ctxt->nsNr)
10210
136k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
997k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
997k
  return(-1);
10223
997k
    }
10224
10225
13.5M
    return(0);
10226
14.5M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
12.7M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
12.7M
    xmlParserNodeInfo node_info;
10237
12.7M
    xmlNodePtr ret = ctxt->node;
10238
10239
12.7M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
12.7M
    if (ctxt->sax2) {
10249
7.43M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
7.43M
  namePop(ctxt);
10251
7.43M
    }
10252
5.35M
#ifdef LIBXML_SAX1_ENABLED
10253
5.35M
    else
10254
5.35M
  xmlParseEndTag1(ctxt, 0);
10255
12.7M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
12.7M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
12.7M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
510k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
510k
    xmlChar *buf = NULL;
10286
510k
    int len = 0;
10287
510k
    int size = 10;
10288
510k
    xmlChar cur;
10289
10290
510k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
510k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
510k
    cur = CUR;
10296
510k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
3.97k
  xmlFree(buf);
10298
3.97k
  return(NULL);
10299
3.97k
    }
10300
506k
    buf[len++] = cur;
10301
506k
    NEXT;
10302
506k
    cur=CUR;
10303
506k
    if (cur != '.') {
10304
4.52k
  xmlFree(buf);
10305
4.52k
  return(NULL);
10306
4.52k
    }
10307
501k
    buf[len++] = cur;
10308
501k
    NEXT;
10309
501k
    cur=CUR;
10310
2.47M
    while ((cur >= '0') && (cur <= '9')) {
10311
1.97M
  if (len + 1 >= size) {
10312
4.18k
      xmlChar *tmp;
10313
10314
4.18k
      size *= 2;
10315
4.18k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
4.18k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
4.18k
      buf = tmp;
10322
4.18k
  }
10323
1.97M
  buf[len++] = cur;
10324
1.97M
  NEXT;
10325
1.97M
  cur=CUR;
10326
1.97M
    }
10327
501k
    buf[len] = 0;
10328
501k
    return(buf);
10329
501k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
578k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
578k
    xmlChar *version = NULL;
10349
10350
578k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
521k
  SKIP(7);
10352
521k
  SKIP_BLANKS;
10353
521k
  if (RAW != '=') {
10354
5.27k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
5.27k
      return(NULL);
10356
5.27k
        }
10357
516k
  NEXT;
10358
516k
  SKIP_BLANKS;
10359
516k
  if (RAW == '"') {
10360
455k
      NEXT;
10361
455k
      version = xmlParseVersionNum(ctxt);
10362
455k
      if (RAW != '"') {
10363
15.8k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
15.8k
      } else
10365
439k
          NEXT;
10366
455k
  } else if (RAW == '\''){
10367
55.0k
      NEXT;
10368
55.0k
      version = xmlParseVersionNum(ctxt);
10369
55.0k
      if (RAW != '\'') {
10370
1.91k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
1.91k
      } else
10372
53.1k
          NEXT;
10373
55.0k
  } else {
10374
6.04k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
6.04k
  }
10376
516k
    }
10377
573k
    return(version);
10378
578k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
229k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
229k
    xmlChar *buf = NULL;
10395
229k
    int len = 0;
10396
229k
    int size = 10;
10397
229k
    xmlChar cur;
10398
10399
229k
    cur = CUR;
10400
229k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
229k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
228k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
228k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
228k
  buf[len++] = cur;
10409
228k
  NEXT;
10410
228k
  cur = CUR;
10411
4.17M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
4.17M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
4.17M
         ((cur >= '0') && (cur <= '9')) ||
10414
4.17M
         (cur == '.') || (cur == '_') ||
10415
4.17M
         (cur == '-')) {
10416
3.94M
      if (len + 1 >= size) {
10417
92.7k
          xmlChar *tmp;
10418
10419
92.7k
    size *= 2;
10420
92.7k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
92.7k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
92.7k
    buf = tmp;
10427
92.7k
      }
10428
3.94M
      buf[len++] = cur;
10429
3.94M
      NEXT;
10430
3.94M
      cur = CUR;
10431
3.94M
      if (cur == 0) {
10432
1.15k
          SHRINK;
10433
1.15k
    GROW;
10434
1.15k
    cur = CUR;
10435
1.15k
      }
10436
3.94M
        }
10437
228k
  buf[len] = 0;
10438
228k
    } else {
10439
970
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
970
    }
10441
229k
    return(buf);
10442
229k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
383k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
383k
    xmlChar *encoding = NULL;
10462
10463
383k
    SKIP_BLANKS;
10464
383k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
231k
  SKIP(8);
10466
231k
  SKIP_BLANKS;
10467
231k
  if (RAW != '=') {
10468
1.12k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.12k
      return(NULL);
10470
1.12k
        }
10471
230k
  NEXT;
10472
230k
  SKIP_BLANKS;
10473
230k
  if (RAW == '"') {
10474
194k
      NEXT;
10475
194k
      encoding = xmlParseEncName(ctxt);
10476
194k
      if (RAW != '"') {
10477
7.59k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
7.59k
    xmlFree((xmlChar *) encoding);
10479
7.59k
    return(NULL);
10480
7.59k
      } else
10481
187k
          NEXT;
10482
194k
  } else if (RAW == '\''){
10483
34.1k
      NEXT;
10484
34.1k
      encoding = xmlParseEncName(ctxt);
10485
34.1k
      if (RAW != '\'') {
10486
658
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
658
    xmlFree((xmlChar *) encoding);
10488
658
    return(NULL);
10489
658
      } else
10490
33.5k
          NEXT;
10491
34.1k
  } else {
10492
1.54k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
1.54k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
222k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
71.2k
      xmlFree((xmlChar *) encoding);
10500
71.2k
            return(NULL);
10501
71.2k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
151k
        if ((encoding != NULL) &&
10508
151k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
149k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
4.14k
      if ((ctxt->encoding == NULL) &&
10517
4.14k
          (ctxt->input->buf != NULL) &&
10518
4.14k
          (ctxt->input->buf->encoder == NULL)) {
10519
4.12k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
4.12k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
4.12k
      }
10522
4.14k
      if (ctxt->encoding != NULL)
10523
15
    xmlFree((xmlChar *) ctxt->encoding);
10524
4.14k
      ctxt->encoding = encoding;
10525
4.14k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
146k
        else if ((encoding != NULL) &&
10530
146k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
145k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
65.1k
      if (ctxt->encoding != NULL)
10533
38
    xmlFree((xmlChar *) ctxt->encoding);
10534
65.1k
      ctxt->encoding = encoding;
10535
65.1k
  }
10536
81.8k
  else if (encoding != NULL) {
10537
80.5k
      xmlCharEncodingHandlerPtr handler;
10538
10539
80.5k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
80.5k
      ctxt->input->encoding = encoding;
10542
10543
80.5k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
80.5k
      if (handler != NULL) {
10545
79.3k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
290
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
290
        return(NULL);
10549
290
    }
10550
79.3k
      } else {
10551
1.25k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
1.25k
      "Unsupported encoding %s\n", encoding);
10553
1.25k
    return(NULL);
10554
1.25k
      }
10555
80.5k
  }
10556
151k
    }
10557
301k
    return(encoding);
10558
383k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
307k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
307k
    int standalone = -2;
10596
10597
307k
    SKIP_BLANKS;
10598
307k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
65.9k
  SKIP(10);
10600
65.9k
        SKIP_BLANKS;
10601
65.9k
  if (RAW != '=') {
10602
455
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
455
      return(standalone);
10604
455
        }
10605
65.5k
  NEXT;
10606
65.5k
  SKIP_BLANKS;
10607
65.5k
        if (RAW == '\''){
10608
35.0k
      NEXT;
10609
35.0k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
29.6k
          standalone = 0;
10611
29.6k
                SKIP(2);
10612
29.6k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
5.36k
                 (NXT(2) == 's')) {
10614
4.96k
          standalone = 1;
10615
4.96k
    SKIP(3);
10616
4.96k
            } else {
10617
403
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
403
      }
10619
35.0k
      if (RAW != '\'') {
10620
1.04k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
1.04k
      } else
10622
34.0k
          NEXT;
10623
35.0k
  } else if (RAW == '"'){
10624
30.0k
      NEXT;
10625
30.0k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
14.1k
          standalone = 0;
10627
14.1k
    SKIP(2);
10628
15.8k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
15.8k
                 (NXT(2) == 's')) {
10630
14.5k
          standalone = 1;
10631
14.5k
                SKIP(3);
10632
14.5k
            } else {
10633
1.26k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
1.26k
      }
10635
30.0k
      if (RAW != '"') {
10636
1.62k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
1.62k
      } else
10638
28.3k
          NEXT;
10639
30.0k
  } else {
10640
481
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
481
        }
10642
65.5k
    }
10643
306k
    return(standalone);
10644
307k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
548k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
548k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
548k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
548k
    SKIP(5);
10672
10673
548k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
548k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
548k
    version = xmlParseVersionInfo(ctxt);
10683
548k
    if (version == NULL) {
10684
73.3k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
475k
    } else {
10686
475k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
8.41k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
2.37k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
2.37k
                "Unsupported version '%s'\n",
10693
2.37k
                version);
10694
6.03k
      } else {
10695
6.03k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
5.17k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
5.17k
                      "Unsupported version '%s'\n",
10698
5.17k
          version, NULL);
10699
5.17k
    } else {
10700
859
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
859
              "Unsupported version '%s'\n",
10702
859
              version);
10703
859
    }
10704
6.03k
      }
10705
8.41k
  }
10706
475k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
475k
  ctxt->version = version;
10709
475k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
548k
    if (!IS_BLANK_CH(RAW)) {
10715
281k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
195k
      SKIP(2);
10717
195k
      return;
10718
195k
  }
10719
86.7k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
86.7k
    }
10721
353k
    xmlParseEncodingDecl(ctxt);
10722
353k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
353k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
1.08k
        return;
10728
1.08k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
352k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
47.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
45.1k
      SKIP(2);
10736
45.1k
      return;
10737
45.1k
  }
10738
2.79k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
2.79k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
307k
    GROW;
10745
10746
307k
    SKIP_BLANKS;
10747
307k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
307k
    SKIP_BLANKS;
10750
307k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
181k
        SKIP(2);
10752
181k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
1.41k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
1.41k
  NEXT;
10756
124k
    } else {
10757
124k
        int c;
10758
10759
124k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
5.09M
        while ((c = CUR) != 0) {
10761
5.08M
            NEXT;
10762
5.08M
            if (c == '>')
10763
113k
                break;
10764
5.08M
        }
10765
124k
    }
10766
307k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
798k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
924k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
924k
        SKIP_BLANKS;
10783
924k
        GROW;
10784
924k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
80.0k
      xmlParsePI(ctxt);
10786
844k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
46.7k
      xmlParseComment(ctxt);
10788
798k
        } else {
10789
798k
            break;
10790
798k
        }
10791
924k
    }
10792
798k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
369k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
369k
    xmlChar start[4];
10812
369k
    xmlCharEncoding enc;
10813
10814
369k
    xmlInitParser();
10815
10816
369k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
369k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
369k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
369k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
369k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
369k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
369k
    if ((ctxt->encoding == NULL) &&
10835
369k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
359k
  start[0] = RAW;
10842
359k
  start[1] = NXT(1);
10843
359k
  start[2] = NXT(2);
10844
359k
  start[3] = NXT(3);
10845
359k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
359k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
196k
      xmlSwitchEncoding(ctxt, enc);
10848
196k
  }
10849
359k
    }
10850
10851
10852
369k
    if (CUR == 0) {
10853
2.71k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
2.71k
  return(-1);
10855
2.71k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
366k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
29.3k
       GROW;
10865
29.3k
    }
10866
366k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
184k
  xmlParseXMLDecl(ctxt);
10872
184k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
184k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
400
      return(-1);
10878
400
  }
10879
183k
  ctxt->standalone = ctxt->input->standalone;
10880
183k
  SKIP_BLANKS;
10881
183k
    } else {
10882
182k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
182k
    }
10884
366k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
347k
        ctxt->sax->startDocument(ctxt->userData);
10886
366k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
366k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
366k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
366k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
366k
    GROW;
10903
366k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
231k
  ctxt->inSubset = 1;
10906
231k
  xmlParseDocTypeDecl(ctxt);
10907
231k
  if (RAW == '[') {
10908
166k
      ctxt->instate = XML_PARSER_DTD;
10909
166k
      xmlParseInternalSubset(ctxt);
10910
166k
      if (ctxt->instate == XML_PARSER_EOF)
10911
47.0k
    return(-1);
10912
166k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
184k
  ctxt->inSubset = 2;
10918
184k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
184k
      (!ctxt->disableSAX))
10920
172k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
172k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
184k
  if (ctxt->instate == XML_PARSER_EOF)
10923
18.8k
      return(-1);
10924
165k
  ctxt->inSubset = 0;
10925
10926
165k
        xmlCleanSpecialAttr(ctxt);
10927
10928
165k
  ctxt->instate = XML_PARSER_PROLOG;
10929
165k
  xmlParseMisc(ctxt);
10930
165k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
300k
    GROW;
10936
300k
    if (RAW != '<') {
10937
34.1k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
34.1k
           "Start tag expected, '<' not found\n");
10939
266k
    } else {
10940
266k
  ctxt->instate = XML_PARSER_CONTENT;
10941
266k
  xmlParseElement(ctxt);
10942
266k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
266k
  xmlParseMisc(ctxt);
10949
10950
266k
  if (RAW != 0) {
10951
73.8k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
73.8k
  }
10953
266k
  ctxt->instate = XML_PARSER_EOF;
10954
266k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
300k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
300k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
300k
    if ((ctxt->myDoc != NULL) &&
10966
300k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
1.81k
  xmlFreeDoc(ctxt->myDoc);
10968
1.81k
  ctxt->myDoc = NULL;
10969
1.81k
    }
10970
10971
300k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
30.9k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
30.9k
  if (ctxt->valid)
10974
17.0k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
30.9k
  if (ctxt->nsWellFormed)
10976
29.4k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
30.9k
  if (ctxt->options & XML_PARSE_OLD10)
10978
6.84k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
30.9k
    }
10980
300k
    if (! ctxt->wellFormed) {
10981
269k
  ctxt->valid = 0;
10982
269k
  return(-1);
10983
269k
    }
10984
31.0k
    return(0);
10985
300k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
25.7M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
25.7M
    const xmlChar *cur;
11110
11111
25.7M
    if (ctxt->checkIndex == 0) {
11112
24.9M
        cur = ctxt->input->cur + 1;
11113
24.9M
    } else {
11114
849k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
849k
    }
11116
11117
25.7M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
881k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
881k
        return(0);
11120
24.8M
    } else {
11121
24.8M
        ctxt->checkIndex = 0;
11122
24.8M
        return(1);
11123
24.8M
    }
11124
25.7M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
4.11M
                     const char *str, size_t strLen) {
11138
4.11M
    const xmlChar *cur, *term;
11139
11140
4.11M
    if (ctxt->checkIndex == 0) {
11141
2.44M
        cur = ctxt->input->cur + startDelta;
11142
2.44M
    } else {
11143
1.67M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.67M
    }
11145
11146
4.11M
    term = BAD_CAST strstr((const char *) cur, str);
11147
4.11M
    if (term == NULL) {
11148
2.23M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
2.23M
        if ((size_t) (end - cur) < strLen)
11152
51.4k
            end = cur;
11153
2.18M
        else
11154
2.18M
            end -= strLen - 1;
11155
2.23M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
2.23M
    } else {
11157
1.87M
        ctxt->checkIndex = 0;
11158
1.87M
    }
11159
11160
4.11M
    return(term);
11161
4.11M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
45.0M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
45.0M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
45.0M
    const xmlChar *end = ctxt->input->end;
11173
11174
814M
    while (cur < end) {
11175
809M
        if ((*cur == '<') || (*cur == '&')) {
11176
39.8M
            ctxt->checkIndex = 0;
11177
39.8M
            return(1);
11178
39.8M
        }
11179
769M
        cur++;
11180
769M
    }
11181
11182
5.13M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
5.13M
    return(0);
11184
45.0M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
41.6M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
41.6M
    const xmlChar *cur;
11196
41.6M
    const xmlChar *end = ctxt->input->end;
11197
41.6M
    int state = ctxt->endCheckState;
11198
11199
41.6M
    if (ctxt->checkIndex == 0)
11200
34.2M
        cur = ctxt->input->cur + 1;
11201
7.37M
    else
11202
7.37M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
1.77G
    while (cur < end) {
11205
1.76G
        if (state) {
11206
929M
            if (*cur == state)
11207
60.1M
                state = 0;
11208
929M
        } else if (*cur == '\'' || *cur == '"') {
11209
60.2M
            state = *cur;
11210
776M
        } else if (*cur == '>') {
11211
34.1M
            ctxt->checkIndex = 0;
11212
34.1M
            ctxt->endCheckState = 0;
11213
34.1M
            return(1);
11214
34.1M
        }
11215
1.73G
        cur++;
11216
1.73G
    }
11217
11218
7.54M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
7.54M
    ctxt->endCheckState = state;
11220
7.54M
    return(0);
11221
41.6M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
2.29M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
2.29M
    const xmlChar *cur, *start;
11240
2.29M
    const xmlChar *end = ctxt->input->end;
11241
2.29M
    int state = ctxt->endCheckState;
11242
11243
2.29M
    if (ctxt->checkIndex == 0) {
11244
298k
        cur = ctxt->input->cur + 1;
11245
2.00M
    } else {
11246
2.00M
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
2.00M
    }
11248
2.29M
    start = cur;
11249
11250
436M
    while (cur < end) {
11251
434M
        if (state == '-') {
11252
30.1M
            if ((*cur == '-') &&
11253
30.1M
                (cur[1] == '-') &&
11254
30.1M
                (cur[2] == '>')) {
11255
444k
                state = 0;
11256
444k
                cur += 3;
11257
444k
                start = cur;
11258
444k
                continue;
11259
444k
            }
11260
30.1M
        }
11261
403M
        else if (state == ']') {
11262
300k
            if (*cur == '>') {
11263
250k
                ctxt->checkIndex = 0;
11264
250k
                ctxt->endCheckState = 0;
11265
250k
                return(1);
11266
250k
            }
11267
50.4k
            if (IS_BLANK_CH(*cur)) {
11268
29.5k
                state = ' ';
11269
29.5k
            } else if (*cur != ']') {
11270
10.2k
                state = 0;
11271
10.2k
                start = cur;
11272
10.2k
                continue;
11273
10.2k
            }
11274
50.4k
        }
11275
403M
        else if (state == ' ') {
11276
99.8k
            if (*cur == '>') {
11277
2.10k
                ctxt->checkIndex = 0;
11278
2.10k
                ctxt->endCheckState = 0;
11279
2.10k
                return(1);
11280
2.10k
            }
11281
97.7k
            if (!IS_BLANK_CH(*cur)) {
11282
27.0k
                state = 0;
11283
27.0k
                start = cur;
11284
27.0k
                continue;
11285
27.0k
            }
11286
97.7k
        }
11287
403M
        else if (state != 0) {
11288
267M
            if (*cur == state) {
11289
2.62M
                state = 0;
11290
2.62M
                start = cur + 1;
11291
2.62M
            }
11292
267M
        }
11293
135M
        else if (*cur == '<') {
11294
3.44M
            if ((cur[1] == '!') &&
11295
3.44M
                (cur[2] == '-') &&
11296
3.44M
                (cur[3] == '-')) {
11297
448k
                state = '-';
11298
448k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
448k
                start = cur;
11301
448k
                continue;
11302
448k
            }
11303
3.44M
        }
11304
132M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
2.92M
            state = *cur;
11306
2.92M
        }
11307
11308
432M
        cur++;
11309
432M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
2.04M
    if ((state == 0) || (state == '-')) {
11316
734k
        if (cur - start < 3)
11317
65.2k
            cur = start;
11318
668k
        else
11319
668k
            cur -= 3;
11320
734k
    }
11321
2.04M
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
2.04M
    ctxt->endCheckState = state;
11323
2.04M
    return(0);
11324
2.29M
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
1.23M
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
1.23M
    int ix;
11340
1.23M
    unsigned char c;
11341
1.23M
    int codepoint;
11342
11343
1.23M
    if ((utf == NULL) || (len <= 0))
11344
20.5k
        return(0);
11345
11346
69.5M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
69.2M
        c = utf[ix];
11348
69.2M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
64.9M
      if (c >= 0x20)
11350
56.9M
    ix++;
11351
7.97M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
7.74M
          ix++;
11353
225k
      else
11354
225k
          return(-ix);
11355
64.9M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
1.48M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
1.46M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
373k
          return(-ix);
11359
1.09M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
1.09M
      codepoint |= utf[ix+1] & 0x3f;
11361
1.09M
      if (!xmlIsCharQ(codepoint))
11362
12.3k
          return(-ix);
11363
1.08M
      ix += 2;
11364
2.80M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
1.26M
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
1.24M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
1.24M
          ((utf[ix+2] & 0xc0) != 0x80))
11368
51.4k
        return(-ix);
11369
1.19M
      codepoint = (utf[ix] & 0xf) << 12;
11370
1.19M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
1.19M
      codepoint |= utf[ix+2] & 0x3f;
11372
1.19M
      if (!xmlIsCharQ(codepoint))
11373
16.9k
          return(-ix);
11374
1.18M
      ix += 3;
11375
1.54M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
1.43M
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
1.42M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
1.42M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
1.42M
    ((utf[ix+3] & 0xc0) != 0x80))
11380
60.4k
        return(-ix);
11381
1.36M
      codepoint = (utf[ix] & 0x7) << 18;
11382
1.36M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
1.36M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
1.36M
      codepoint |= utf[ix+3] & 0x3f;
11385
1.36M
      if (!xmlIsCharQ(codepoint))
11386
16.6k
          return(-ix);
11387
1.34M
      ix += 4;
11388
1.34M
  } else       /* unknown encoding */
11389
109k
      return(-ix);
11390
69.2M
      }
11391
299k
      return(ix);
11392
1.21M
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
20.3M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
20.3M
    int ret = 0;
11406
20.3M
    int avail, tlen;
11407
20.3M
    xmlChar cur, next;
11408
11409
20.3M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
20.3M
    if ((ctxt->input != NULL) &&
11466
20.3M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
420k
        xmlParserInputShrink(ctxt->input);
11468
420k
    }
11469
11470
208M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
208M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
215k
      return(0);
11473
11474
207M
  if (ctxt->input == NULL) break;
11475
207M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
207M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
207M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
207M
          (ctxt->input->buf->raw != NULL) &&
11488
207M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
310k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
310k
                                                 ctxt->input);
11491
310k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
310k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
310k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
310k
                                      base, current);
11496
310k
      }
11497
207M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
207M
        (ctxt->input->cur - ctxt->input->base);
11499
207M
  }
11500
207M
        if (avail < 1)
11501
941k
      goto done;
11502
206M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
1.77M
            case XML_PARSER_START:
11509
1.77M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
571k
        xmlChar start[4];
11511
571k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
571k
        if (avail < 4)
11517
48.8k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
522k
        start[0] = RAW;
11527
522k
        start[1] = NXT(1);
11528
522k
        start[2] = NXT(2);
11529
522k
        start[3] = NXT(3);
11530
522k
        enc = xmlDetectCharEncoding(start, 4);
11531
522k
        xmlSwitchEncoding(ctxt, enc);
11532
522k
        break;
11533
571k
    }
11534
11535
1.20M
    if (avail < 2)
11536
481
        goto done;
11537
1.20M
    cur = ctxt->input->cur[0];
11538
1.20M
    next = ctxt->input->cur[1];
11539
1.20M
    if (cur == 0) {
11540
3.90k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
3.90k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
3.90k
                  &xmlDefaultSAXLocator);
11543
3.90k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
3.90k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
3.90k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
3.90k
      ctxt->sax->endDocument(ctxt->userData);
11551
3.90k
        goto done;
11552
3.90k
    }
11553
1.19M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
886k
        if (avail < 5) goto done;
11556
886k
        if ((!terminate) &&
11557
886k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
489k
      goto done;
11559
396k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
396k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
396k
                  &xmlDefaultSAXLocator);
11562
396k
        if ((ctxt->input->cur[2] == 'x') &&
11563
396k
      (ctxt->input->cur[3] == 'm') &&
11564
396k
      (ctxt->input->cur[4] == 'l') &&
11565
396k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
364k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
364k
      xmlParseXMLDecl(ctxt);
11572
364k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
687
          xmlHaltParser(ctxt);
11578
687
          return(0);
11579
687
      }
11580
363k
      ctxt->standalone = ctxt->input->standalone;
11581
363k
      if ((ctxt->encoding == NULL) &&
11582
363k
          (ctxt->input->encoding != NULL))
11583
50.0k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
363k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
363k
          (!ctxt->disableSAX))
11586
329k
          ctxt->sax->startDocument(ctxt->userData);
11587
363k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
363k
        } else {
11593
32.1k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
32.1k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
32.1k
          (!ctxt->disableSAX))
11596
32.1k
          ctxt->sax->startDocument(ctxt->userData);
11597
32.1k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
32.1k
        }
11603
396k
    } else {
11604
312k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
312k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
312k
                  &xmlDefaultSAXLocator);
11607
312k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
312k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
312k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
312k
            (!ctxt->disableSAX))
11614
312k
      ctxt->sax->startDocument(ctxt->userData);
11615
312k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
312k
    }
11621
708k
    break;
11622
45.8M
            case XML_PARSER_START_TAG: {
11623
45.8M
          const xmlChar *name;
11624
45.8M
    const xmlChar *prefix = NULL;
11625
45.8M
    const xmlChar *URI = NULL;
11626
45.8M
                int line = ctxt->input->line;
11627
45.8M
    int nsNr = ctxt->nsNr;
11628
11629
45.8M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
45.8M
    cur = ctxt->input->cur[0];
11632
45.8M
          if (cur != '<') {
11633
30.4k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
30.4k
        xmlHaltParser(ctxt);
11635
30.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
30.4k
      ctxt->sax->endDocument(ctxt->userData);
11637
30.4k
        goto done;
11638
30.4k
    }
11639
45.7M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
7.29M
                    goto done;
11641
38.4M
    if (ctxt->spaceNr == 0)
11642
157k
        spacePush(ctxt, -1);
11643
38.3M
    else if (*ctxt->space == -2)
11644
3.33M
        spacePush(ctxt, -1);
11645
34.9M
    else
11646
34.9M
        spacePush(ctxt, *ctxt->space);
11647
38.4M
#ifdef LIBXML_SAX1_ENABLED
11648
38.4M
    if (ctxt->sax2)
11649
21.6M
#endif /* LIBXML_SAX1_ENABLED */
11650
21.6M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
16.8M
#ifdef LIBXML_SAX1_ENABLED
11652
16.8M
    else
11653
16.8M
        name = xmlParseStartTag(ctxt);
11654
38.4M
#endif /* LIBXML_SAX1_ENABLED */
11655
38.4M
    if (ctxt->instate == XML_PARSER_EOF)
11656
1.75k
        goto done;
11657
38.4M
    if (name == NULL) {
11658
36.8k
        spacePop(ctxt);
11659
36.8k
        xmlHaltParser(ctxt);
11660
36.8k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
36.8k
      ctxt->sax->endDocument(ctxt->userData);
11662
36.8k
        goto done;
11663
36.8k
    }
11664
38.4M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
38.4M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
38.4M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
38.4M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
38.4M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
18.8M
        SKIP(2);
11680
11681
18.8M
        if (ctxt->sax2) {
11682
10.6M
      if ((ctxt->sax != NULL) &&
11683
10.6M
          (ctxt->sax->endElementNs != NULL) &&
11684
10.6M
          (!ctxt->disableSAX))
11685
10.6M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
10.6M
                                  prefix, URI);
11687
10.6M
      if (ctxt->nsNr - nsNr > 0)
11688
23.4k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
10.6M
#ifdef LIBXML_SAX1_ENABLED
11690
10.6M
        } else {
11691
8.14M
      if ((ctxt->sax != NULL) &&
11692
8.14M
          (ctxt->sax->endElement != NULL) &&
11693
8.14M
          (!ctxt->disableSAX))
11694
8.14M
          ctxt->sax->endElement(ctxt->userData, name);
11695
8.14M
#endif /* LIBXML_SAX1_ENABLED */
11696
8.14M
        }
11697
18.8M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
18.8M
        spacePop(ctxt);
11700
18.8M
        if (ctxt->nameNr == 0) {
11701
17.5k
      ctxt->instate = XML_PARSER_EPILOG;
11702
18.8M
        } else {
11703
18.8M
      ctxt->instate = XML_PARSER_CONTENT;
11704
18.8M
        }
11705
18.8M
        break;
11706
18.8M
    }
11707
19.6M
    if (RAW == '>') {
11708
18.2M
        NEXT;
11709
18.2M
    } else {
11710
1.40M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
1.40M
           "Couldn't find end of Start Tag %s\n",
11712
1.40M
           name);
11713
1.40M
        nodePop(ctxt);
11714
1.40M
        spacePop(ctxt);
11715
1.40M
    }
11716
19.6M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
19.6M
    ctxt->instate = XML_PARSER_CONTENT;
11719
19.6M
                break;
11720
38.4M
      }
11721
135M
            case XML_PARSER_CONTENT: {
11722
135M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
565k
        goto done;
11724
135M
    cur = ctxt->input->cur[0];
11725
135M
    next = ctxt->input->cur[1];
11726
11727
135M
    if ((cur == '<') && (next == '/')) {
11728
17.2M
        ctxt->instate = XML_PARSER_END_TAG;
11729
17.2M
        break;
11730
117M
          } else if ((cur == '<') && (next == '?')) {
11731
390k
        if ((!terminate) &&
11732
390k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
238k
      goto done;
11734
151k
        xmlParsePI(ctxt);
11735
151k
        ctxt->instate = XML_PARSER_CONTENT;
11736
117M
    } else if ((cur == '<') && (next != '!')) {
11737
38.0M
        ctxt->instate = XML_PARSER_START_TAG;
11738
38.0M
        break;
11739
79.5M
    } else if ((cur == '<') && (next == '!') &&
11740
79.5M
               (ctxt->input->cur[2] == '-') &&
11741
79.5M
         (ctxt->input->cur[3] == '-')) {
11742
1.02M
        if ((!terminate) &&
11743
1.02M
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
426k
      goto done;
11745
594k
        xmlParseComment(ctxt);
11746
594k
        ctxt->instate = XML_PARSER_CONTENT;
11747
78.5M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
78.5M
        (ctxt->input->cur[2] == '[') &&
11749
78.5M
        (ctxt->input->cur[3] == 'C') &&
11750
78.5M
        (ctxt->input->cur[4] == 'D') &&
11751
78.5M
        (ctxt->input->cur[5] == 'A') &&
11752
78.5M
        (ctxt->input->cur[6] == 'T') &&
11753
78.5M
        (ctxt->input->cur[7] == 'A') &&
11754
78.5M
        (ctxt->input->cur[8] == '[')) {
11755
145k
        SKIP(9);
11756
145k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
145k
        break;
11758
78.3M
    } else if ((cur == '<') && (next == '!') &&
11759
78.3M
               (avail < 9)) {
11760
31.2k
        goto done;
11761
78.3M
    } else if (cur == '<') {
11762
478k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
478k
                    "detected an error in element content\n");
11764
478k
                    SKIP(1);
11765
77.8M
    } else if (cur == '&') {
11766
13.1M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
343k
      goto done;
11768
12.7M
        xmlParseReference(ctxt);
11769
64.7M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
64.7M
        if ((ctxt->inputNr == 1) &&
11783
64.7M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
46.0M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
5.13M
          goto done;
11786
46.0M
                    }
11787
59.6M
                    ctxt->checkIndex = 0;
11788
59.6M
        xmlParseCharData(ctxt, 0);
11789
59.6M
    }
11790
73.6M
    break;
11791
135M
      }
11792
73.6M
            case XML_PARSER_END_TAG:
11793
17.7M
    if (avail < 2)
11794
0
        goto done;
11795
17.7M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
538k
        goto done;
11797
17.2M
    if (ctxt->sax2) {
11798
9.59M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
9.59M
        nameNsPop(ctxt);
11800
9.59M
    }
11801
7.63M
#ifdef LIBXML_SAX1_ENABLED
11802
7.63M
      else
11803
7.63M
        xmlParseEndTag1(ctxt, 0);
11804
17.2M
#endif /* LIBXML_SAX1_ENABLED */
11805
17.2M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
17.2M
    } else if (ctxt->nameNr == 0) {
11808
94.7k
        ctxt->instate = XML_PARSER_EPILOG;
11809
17.1M
    } else {
11810
17.1M
        ctxt->instate = XML_PARSER_CONTENT;
11811
17.1M
    }
11812
17.2M
    break;
11813
1.66M
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
1.66M
    const xmlChar *term;
11819
11820
1.66M
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
27.2k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
27.2k
                                           "]]>");
11827
1.63M
                } else {
11828
1.63M
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
1.63M
                }
11830
11831
1.66M
    if (term == NULL) {
11832
946k
        int tmp, size;
11833
11834
946k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
6.89k
                        size = ctxt->input->end - ctxt->input->cur;
11837
939k
                    } else {
11838
939k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
427k
                            goto done;
11840
511k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
511k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
511k
                    }
11844
518k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
518k
                    if (tmp <= 0) {
11846
320k
                        tmp = -tmp;
11847
320k
                        ctxt->input->cur += tmp;
11848
320k
                        goto encoding_error;
11849
320k
                    }
11850
197k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
197k
                        if (ctxt->sax->cdataBlock != NULL)
11852
111k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
111k
                                                  ctxt->input->cur, tmp);
11854
86.4k
                        else if (ctxt->sax->characters != NULL)
11855
86.4k
                            ctxt->sax->characters(ctxt->userData,
11856
86.4k
                                                  ctxt->input->cur, tmp);
11857
197k
                    }
11858
197k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
197k
                    SKIPL(tmp);
11861
715k
    } else {
11862
715k
                    int base = term - CUR_PTR;
11863
715k
        int tmp;
11864
11865
715k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
715k
        if ((tmp < 0) || (tmp != base)) {
11867
587k
      tmp = -tmp;
11868
587k
      ctxt->input->cur += tmp;
11869
587k
      goto encoding_error;
11870
587k
        }
11871
127k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
127k
            (ctxt->sax->cdataBlock != NULL) &&
11873
127k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
11.4k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
11.4k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
11.4k
                     "<![CDATA[", 9)))
11882
11.3k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
11.3k
                                 BAD_CAST "", 0);
11884
116k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
116k
      (!ctxt->disableSAX)) {
11886
107k
      if (ctxt->sax->cdataBlock != NULL)
11887
67.1k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
67.1k
              ctxt->input->cur, base);
11889
39.9k
      else if (ctxt->sax->characters != NULL)
11890
39.9k
          ctxt->sax->characters(ctxt->userData,
11891
39.9k
              ctxt->input->cur, base);
11892
107k
        }
11893
127k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
127k
        SKIPL(base + 3);
11896
127k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
127k
    }
11902
325k
    break;
11903
1.66M
      }
11904
1.15M
            case XML_PARSER_MISC:
11905
1.58M
            case XML_PARSER_PROLOG:
11906
1.70M
            case XML_PARSER_EPILOG:
11907
1.70M
    SKIP_BLANKS;
11908
1.70M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.70M
    else
11912
1.70M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.70M
                (ctxt->input->cur - ctxt->input->base);
11914
1.70M
    if (avail < 2)
11915
97.5k
        goto done;
11916
1.60M
    cur = ctxt->input->cur[0];
11917
1.60M
    next = ctxt->input->cur[1];
11918
1.60M
          if ((cur == '<') && (next == '?')) {
11919
170k
        if ((!terminate) &&
11920
170k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
46.2k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
124k
        xmlParsePI(ctxt);
11927
124k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
1.43M
    } else if ((cur == '<') && (next == '!') &&
11930
1.43M
        (ctxt->input->cur[2] == '-') &&
11931
1.43M
        (ctxt->input->cur[3] == '-')) {
11932
176k
        if ((!terminate) &&
11933
176k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
94.9k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
81.6k
        xmlParseComment(ctxt);
11940
81.6k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
1.25M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
1.25M
                    (cur == '<') && (next == '!') &&
11944
1.25M
        (ctxt->input->cur[2] == 'D') &&
11945
1.25M
        (ctxt->input->cur[3] == 'O') &&
11946
1.25M
        (ctxt->input->cur[4] == 'C') &&
11947
1.25M
        (ctxt->input->cur[5] == 'T') &&
11948
1.25M
        (ctxt->input->cur[6] == 'Y') &&
11949
1.25M
        (ctxt->input->cur[7] == 'P') &&
11950
1.25M
        (ctxt->input->cur[8] == 'E')) {
11951
685k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
247k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
438k
        ctxt->inSubset = 1;
11958
438k
        xmlParseDocTypeDecl(ctxt);
11959
438k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
438k
        if (RAW == '[') {
11962
314k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
314k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
124k
      ctxt->inSubset = 2;
11972
124k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
124k
          (ctxt->sax->externalSubset != NULL))
11974
119k
          ctxt->sax->externalSubset(ctxt->userData,
11975
119k
            ctxt->intSubName, ctxt->extSubSystem,
11976
119k
            ctxt->extSubURI);
11977
124k
      ctxt->inSubset = 0;
11978
124k
      xmlCleanSpecialAttr(ctxt);
11979
124k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
124k
        }
11985
571k
    } else if ((cur == '<') && (next == '!') &&
11986
571k
               (avail <
11987
50.5k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
45.8k
        goto done;
11989
525k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
15.1k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
15.1k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
15.1k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
15.1k
      ctxt->sax->endDocument(ctxt->userData);
11998
15.1k
        goto done;
11999
510k
                } else {
12000
510k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
510k
    }
12006
1.15M
    break;
12007
2.34M
            case XML_PARSER_DTD: {
12008
2.34M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
2.04M
                    goto done;
12010
297k
    xmlParseInternalSubset(ctxt);
12011
297k
    if (ctxt->instate == XML_PARSER_EOF)
12012
67.1k
        goto done;
12013
229k
    ctxt->inSubset = 2;
12014
229k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
229k
        (ctxt->sax->externalSubset != NULL))
12016
222k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
222k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
229k
    ctxt->inSubset = 0;
12019
229k
    xmlCleanSpecialAttr(ctxt);
12020
229k
    if (ctxt->instate == XML_PARSER_EOF)
12021
12.3k
        goto done;
12022
217k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
217k
                break;
12028
229k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
206M
  }
12102
206M
    }
12103
19.2M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
19.2M
    return(ret);
12108
907k
encoding_error:
12109
907k
    {
12110
907k
        char buffer[150];
12111
12112
907k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
907k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
907k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
907k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
907k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
907k
         BAD_CAST buffer, NULL);
12118
907k
    }
12119
907k
    return(0);
12120
20.3M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
27.2M
              int terminate) {
12136
27.2M
    int end_in_lf = 0;
12137
27.2M
    int remain = 0;
12138
12139
27.2M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
27.2M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
7.01M
        return(ctxt->errNo);
12143
20.2M
    if (ctxt->instate == XML_PARSER_EOF)
12144
2.01k
        return(-1);
12145
20.2M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
20.2M
    ctxt->progressive = 1;
12149
20.2M
    if (ctxt->instate == XML_PARSER_START)
12150
1.17M
        xmlDetectSAX2(ctxt);
12151
20.2M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
20.2M
        (chunk[size - 1] == '\r')) {
12153
155k
  end_in_lf = 1;
12154
155k
  size--;
12155
155k
    }
12156
12157
20.3M
xmldecl_done:
12158
12159
20.3M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
20.3M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
19.8M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
19.8M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
19.8M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
19.8M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
19.8M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
124k
            unsigned int len = 45;
12173
12174
124k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
124k
                               BAD_CAST "UTF-16")) ||
12176
124k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
54.1k
                               BAD_CAST "UTF16")))
12178
70.2k
                len = 90;
12179
54.1k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
54.1k
                                    BAD_CAST "UCS-4")) ||
12181
54.1k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
52.7k
                                    BAD_CAST "UCS4")))
12183
1.38k
                len = 180;
12184
12185
124k
            if (ctxt->input->buf->rawconsumed < len)
12186
9.54k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
124k
            if ((unsigned int) size > len) {
12194
85.2k
                remain = size - len;
12195
85.2k
                size = len;
12196
85.2k
            } else {
12197
39.0k
                remain = 0;
12198
39.0k
            }
12199
124k
        }
12200
19.8M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
19.8M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
19.8M
  if (res < 0) {
12203
2.85k
      ctxt->errNo = XML_PARSER_EOF;
12204
2.85k
      xmlHaltParser(ctxt);
12205
2.85k
      return (XML_PARSER_EOF);
12206
2.85k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
19.8M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
497k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
497k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
497k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
497k
        (in->raw != NULL)) {
12216
38.1k
    int nbchars;
12217
38.1k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
38.1k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
38.1k
    nbchars = xmlCharEncInput(in, terminate);
12221
38.1k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
38.1k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
4.09k
        xmlGenericError(xmlGenericErrorContext,
12225
4.09k
            "xmlParseChunk: encoder error\n");
12226
4.09k
                    xmlHaltParser(ctxt);
12227
4.09k
        return(XML_ERR_INVALID_ENCODING);
12228
4.09k
    }
12229
38.1k
      }
12230
497k
  }
12231
497k
    }
12232
12233
20.3M
    if (remain != 0) {
12234
84.1k
        xmlParseTryOrFinish(ctxt, 0);
12235
20.2M
    } else {
12236
20.2M
        xmlParseTryOrFinish(ctxt, terminate);
12237
20.2M
    }
12238
20.3M
    if (ctxt->instate == XML_PARSER_EOF)
12239
170k
        return(ctxt->errNo);
12240
12241
20.1M
    if ((ctxt->input != NULL) &&
12242
20.1M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
20.1M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
20.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
20.1M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
219k
        return(ctxt->errNo);
12250
12251
19.9M
    if (remain != 0) {
12252
83.0k
        chunk += size;
12253
83.0k
        size = remain;
12254
83.0k
        remain = 0;
12255
83.0k
        goto xmldecl_done;
12256
83.0k
    }
12257
19.8M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
19.8M
        (ctxt->input->buf != NULL)) {
12259
153k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
153k
           ctxt->input);
12261
153k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
153k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
153k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
153k
            base, current);
12267
153k
    }
12268
19.8M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
242k
  int cur_avail = 0;
12273
12274
242k
  if (ctxt->input != NULL) {
12275
242k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
242k
      else
12279
242k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
242k
                    (ctxt->input->cur - ctxt->input->base);
12281
242k
  }
12282
12283
242k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
242k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
161k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
161k
  }
12287
242k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
2.42k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
2.42k
  }
12290
242k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
242k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
242k
    ctxt->sax->endDocument(ctxt->userData);
12293
242k
  }
12294
242k
  ctxt->instate = XML_PARSER_EOF;
12295
242k
    }
12296
19.8M
    if (ctxt->wellFormed == 0)
12297
9.20M
  return((xmlParserErrors) ctxt->errNo);
12298
10.6M
    else
12299
10.6M
        return(0);
12300
19.8M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
740k
                        const char *chunk, int size, const char *filename) {
12330
740k
    xmlParserCtxtPtr ctxt;
12331
740k
    xmlParserInputPtr inputStream;
12332
740k
    xmlParserInputBufferPtr buf;
12333
740k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
740k
    if ((chunk != NULL) && (size >= 4))
12339
359k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
740k
    buf = xmlAllocParserInputBuffer(enc);
12342
740k
    if (buf == NULL) return(NULL);
12343
12344
740k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
740k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
740k
    ctxt->dictNames = 1;
12351
740k
    if (filename == NULL) {
12352
370k
  ctxt->directory = NULL;
12353
370k
    } else {
12354
370k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
370k
    }
12356
12357
740k
    inputStream = xmlNewInputStream(ctxt);
12358
740k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
740k
    if (filename == NULL)
12365
370k
  inputStream->filename = NULL;
12366
370k
    else {
12367
370k
  inputStream->filename = (char *)
12368
370k
      xmlCanonicPath((const xmlChar *) filename);
12369
370k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
370k
    }
12376
740k
    inputStream->buf = buf;
12377
740k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
740k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
740k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
740k
    if ((size != 0) && (chunk != NULL) &&
12388
740k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
359k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
359k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
359k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
359k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
359k
    }
12399
12400
740k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
196k
        xmlSwitchEncoding(ctxt, enc);
12402
196k
    }
12403
12404
740k
    return(ctxt);
12405
740k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
660k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
660k
    if (ctxt == NULL)
12418
0
        return;
12419
660k
    ctxt->instate = XML_PARSER_EOF;
12420
660k
    ctxt->disableSAX = 1;
12421
744k
    while (ctxt->inputNr > 1)
12422
84.5k
        xmlFreeInputStream(inputPop(ctxt));
12423
660k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
660k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
660k
        if (ctxt->input->buf != NULL) {
12433
587k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
587k
            ctxt->input->buf = NULL;
12435
587k
        }
12436
660k
  ctxt->input->cur = BAD_CAST"";
12437
660k
        ctxt->input->length = 0;
12438
660k
  ctxt->input->base = ctxt->input->cur;
12439
660k
        ctxt->input->end = ctxt->input->cur;
12440
660k
    }
12441
660k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
371k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
371k
    if (ctxt == NULL)
12452
0
        return;
12453
371k
    xmlHaltParser(ctxt);
12454
371k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
371k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
1.66M
          const xmlChar *ID, xmlNodePtr *list) {
12832
1.66M
    xmlParserCtxtPtr ctxt;
12833
1.66M
    xmlDocPtr newDoc;
12834
1.66M
    xmlNodePtr newRoot;
12835
1.66M
    xmlParserErrors ret = XML_ERR_OK;
12836
1.66M
    xmlChar start[4];
12837
1.66M
    xmlCharEncoding enc;
12838
12839
1.66M
    if (((depth > 40) &&
12840
1.66M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
1.66M
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
1.66M
    if (list != NULL)
12848
128k
        *list = NULL;
12849
1.66M
    if ((URL == NULL) && (ID == NULL))
12850
1.18k
  return(XML_ERR_INTERNAL_ERROR);
12851
1.66M
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
1.66M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
1.66M
                                             oldctxt);
12856
1.66M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
267k
    if (oldctxt != NULL) {
12858
267k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
267k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
267k
    }
12861
267k
    xmlDetectSAX2(ctxt);
12862
12863
267k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
267k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
267k
    newDoc->properties = XML_DOC_INTERNAL;
12869
267k
    if (doc) {
12870
267k
        newDoc->intSubset = doc->intSubset;
12871
267k
        newDoc->extSubset = doc->extSubset;
12872
267k
        if (doc->dict) {
12873
186k
            newDoc->dict = doc->dict;
12874
186k
            xmlDictReference(newDoc->dict);
12875
186k
        }
12876
267k
        if (doc->URL != NULL) {
12877
160k
            newDoc->URL = xmlStrdup(doc->URL);
12878
160k
        }
12879
267k
    }
12880
267k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
267k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
267k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
267k
    nodePush(ctxt, newDoc->children);
12891
267k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
267k
    } else {
12894
267k
        ctxt->myDoc = doc;
12895
267k
        newRoot->doc = doc;
12896
267k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
267k
    GROW;
12904
267k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
256k
  start[0] = RAW;
12906
256k
  start[1] = NXT(1);
12907
256k
  start[2] = NXT(2);
12908
256k
  start[3] = NXT(3);
12909
256k
  enc = xmlDetectCharEncoding(start, 4);
12910
256k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
18.0k
      xmlSwitchEncoding(ctxt, enc);
12912
18.0k
  }
12913
256k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
267k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
8.08k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
8.08k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
8.08k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
218
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
218
                           "Version mismatch between document and entity\n");
12927
218
        }
12928
8.08k
    }
12929
12930
267k
    ctxt->instate = XML_PARSER_CONTENT;
12931
267k
    ctxt->depth = depth;
12932
267k
    if (oldctxt != NULL) {
12933
267k
  ctxt->_private = oldctxt->_private;
12934
267k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
267k
  ctxt->validate = oldctxt->validate;
12936
267k
  ctxt->valid = oldctxt->valid;
12937
267k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
267k
        if (oldctxt->validate) {
12939
104k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
104k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
104k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
104k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
104k
        }
12944
267k
  ctxt->external = oldctxt->external;
12945
267k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
267k
        ctxt->dict = oldctxt->dict;
12947
267k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
267k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
267k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
267k
        ctxt->dictNames = oldctxt->dictNames;
12951
267k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
267k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
267k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
267k
  ctxt->record_info = oldctxt->record_info;
12955
267k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
267k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
267k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
267k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
267k
    xmlParseContent(ctxt);
12970
12971
267k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
2.24k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
265k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
267k
    if (ctxt->node != newDoc->children) {
12977
12.5k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
12.5k
    }
12979
12980
267k
    if (!ctxt->wellFormed) {
12981
50.7k
  ret = (xmlParserErrors)ctxt->errNo;
12982
50.7k
        if (oldctxt != NULL) {
12983
50.7k
            oldctxt->errNo = ctxt->errNo;
12984
50.7k
            oldctxt->wellFormed = 0;
12985
50.7k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
50.7k
        }
12987
216k
    } else {
12988
216k
  if (list != NULL) {
12989
28.5k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
28.5k
      cur = newDoc->children->children;
12996
28.5k
      *list = cur;
12997
5.70M
      while (cur != NULL) {
12998
5.67M
    cur->parent = NULL;
12999
5.67M
    cur = cur->next;
13000
5.67M
      }
13001
28.5k
            newDoc->children->children = NULL;
13002
28.5k
  }
13003
216k
  ret = XML_ERR_OK;
13004
216k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
267k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
267k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
267k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
267k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
267k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
267k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
267k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
267k
    }
13020
13021
267k
    if (oldctxt != NULL) {
13022
267k
        ctxt->dict = NULL;
13023
267k
        ctxt->attsDefault = NULL;
13024
267k
        ctxt->attsSpecial = NULL;
13025
267k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
267k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
267k
        oldctxt->validate = ctxt->validate;
13028
267k
        oldctxt->valid = ctxt->valid;
13029
267k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
267k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
267k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
267k
    }
13033
267k
    ctxt->node_seq.maximum = 0;
13034
267k
    ctxt->node_seq.length = 0;
13035
267k
    ctxt->node_seq.buffer = NULL;
13036
267k
    xmlFreeParserCtxt(ctxt);
13037
267k
    newDoc->intSubset = NULL;
13038
267k
    newDoc->extSubset = NULL;
13039
267k
    xmlFreeDoc(newDoc);
13040
13041
267k
    return(ret);
13042
267k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
332k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
332k
    xmlParserCtxtPtr ctxt;
13125
332k
    xmlDocPtr newDoc = NULL;
13126
332k
    xmlNodePtr newRoot;
13127
332k
    xmlSAXHandlerPtr oldsax = NULL;
13128
332k
    xmlNodePtr content = NULL;
13129
332k
    xmlNodePtr last = NULL;
13130
332k
    int size;
13131
332k
    xmlParserErrors ret = XML_ERR_OK;
13132
332k
#ifdef SAX2
13133
332k
    int i;
13134
332k
#endif
13135
13136
332k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
332k
        (oldctxt->depth >  100)) {
13138
93
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
93
                       "Maximum entity nesting depth exceeded");
13140
93
  return(XML_ERR_ENTITY_LOOP);
13141
93
    }
13142
13143
13144
332k
    if (lst != NULL)
13145
210k
        *lst = NULL;
13146
332k
    if (string == NULL)
13147
172
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
331k
    size = xmlStrlen(string);
13150
13151
331k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
331k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
261k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
261k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
261k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
261k
    else
13158
261k
  ctxt->userData = ctxt;
13159
261k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
261k
    ctxt->dict = oldctxt->dict;
13161
261k
    ctxt->input_id = oldctxt->input_id;
13162
261k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
261k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
261k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
261k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
262k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
1.47k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
1.47k
    }
13171
261k
#endif
13172
13173
261k
    oldsax = ctxt->sax;
13174
261k
    ctxt->sax = oldctxt->sax;
13175
261k
    xmlDetectSAX2(ctxt);
13176
261k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
261k
    ctxt->options = oldctxt->options;
13178
13179
261k
    ctxt->_private = oldctxt->_private;
13180
261k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
261k
    } else {
13193
261k
  ctxt->myDoc = oldctxt->myDoc;
13194
261k
        content = ctxt->myDoc->children;
13195
261k
  last = ctxt->myDoc->last;
13196
261k
    }
13197
261k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
261k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
261k
    ctxt->myDoc->children = NULL;
13208
261k
    ctxt->myDoc->last = NULL;
13209
261k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
261k
    nodePush(ctxt, ctxt->myDoc->children);
13211
261k
    ctxt->instate = XML_PARSER_CONTENT;
13212
261k
    ctxt->depth = oldctxt->depth;
13213
13214
261k
    ctxt->validate = 0;
13215
261k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
261k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
215k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
215k
    }
13222
261k
    ctxt->dictNames = oldctxt->dictNames;
13223
261k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
261k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
261k
    xmlParseContent(ctxt);
13227
261k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
554
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
260k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
261k
    if (ctxt->node != ctxt->myDoc->children) {
13233
2.73k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
2.73k
    }
13235
13236
261k
    if (!ctxt->wellFormed) {
13237
39.8k
  ret = (xmlParserErrors)ctxt->errNo;
13238
39.8k
        oldctxt->errNo = ctxt->errNo;
13239
39.8k
        oldctxt->wellFormed = 0;
13240
39.8k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
221k
    } else {
13242
221k
        ret = XML_ERR_OK;
13243
221k
    }
13244
13245
261k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
169k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
169k
  cur = ctxt->myDoc->children->children;
13253
169k
  *lst = cur;
13254
441k
  while (cur != NULL) {
13255
271k
#ifdef LIBXML_VALID_ENABLED
13256
271k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
271k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
271k
    (cur->type == XML_ELEMENT_NODE)) {
13259
30.1k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
30.1k
      oldctxt->myDoc, cur);
13261
30.1k
      }
13262
271k
#endif /* LIBXML_VALID_ENABLED */
13263
271k
      cur->parent = NULL;
13264
271k
      cur = cur->next;
13265
271k
  }
13266
169k
  ctxt->myDoc->children->children = NULL;
13267
169k
    }
13268
261k
    if (ctxt->myDoc != NULL) {
13269
261k
  xmlFreeNode(ctxt->myDoc->children);
13270
261k
        ctxt->myDoc->children = content;
13271
261k
        ctxt->myDoc->last = last;
13272
261k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
261k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
261k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
261k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
261k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
261k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
261k
    }
13285
13286
261k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
261k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
261k
    ctxt->sax = oldsax;
13289
261k
    ctxt->dict = NULL;
13290
261k
    ctxt->attsDefault = NULL;
13291
261k
    ctxt->attsSpecial = NULL;
13292
261k
    xmlFreeParserCtxt(ctxt);
13293
261k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
261k
    return(ret);
13298
261k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
1.66M
        xmlParserCtxtPtr pctx) {
13783
1.66M
    xmlParserCtxtPtr ctxt;
13784
1.66M
    xmlParserInputPtr inputStream;
13785
1.66M
    char *directory = NULL;
13786
1.66M
    xmlChar *uri;
13787
13788
1.66M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
1.66M
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
1.66M
    if (pctx != NULL) {
13794
1.66M
        ctxt->options = pctx->options;
13795
1.66M
        ctxt->_private = pctx->_private;
13796
1.66M
  ctxt->input_id = pctx->input_id;
13797
1.66M
    }
13798
13799
    /* Don't read from stdin. */
13800
1.66M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
413
        URL = BAD_CAST "./-";
13802
13803
1.66M
    uri = xmlBuildURI(URL, base);
13804
13805
1.66M
    if (uri == NULL) {
13806
27.4k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
27.4k
  if (inputStream == NULL) {
13808
26.7k
      xmlFreeParserCtxt(ctxt);
13809
26.7k
      return(NULL);
13810
26.7k
  }
13811
13812
639
  inputPush(ctxt, inputStream);
13813
13814
639
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
639
      directory = xmlParserGetDirectory((char *)URL);
13816
639
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
639
      ctxt->directory = directory;
13818
1.63M
    } else {
13819
1.63M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
1.63M
  if (inputStream == NULL) {
13821
1.36M
      xmlFree(uri);
13822
1.36M
      xmlFreeParserCtxt(ctxt);
13823
1.36M
      return(NULL);
13824
1.36M
  }
13825
13826
267k
  inputPush(ctxt, inputStream);
13827
13828
267k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
267k
      directory = xmlParserGetDirectory((char *)uri);
13830
267k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
267k
      ctxt->directory = directory;
13832
267k
  xmlFree(uri);
13833
267k
    }
13834
267k
    return(ctxt);
13835
1.66M
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
702k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
702k
    xmlParserCtxtPtr ctxt;
14178
702k
    xmlParserInputPtr input;
14179
702k
    xmlParserInputBufferPtr buf;
14180
14181
702k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
702k
    if (size <= 0)
14184
71.6k
  return(NULL);
14185
14186
630k
    ctxt = xmlNewParserCtxt();
14187
630k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
630k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
630k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
630k
    input = xmlNewInputStream(ctxt);
14197
630k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
630k
    input->filename = NULL;
14204
630k
    input->buf = buf;
14205
630k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
630k
    inputPush(ctxt, input);
14208
630k
    return(ctxt);
14209
630k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
10.8G
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
10.8G
    if (xmlParserInitialized != 0)
14525
10.8G
  return;
14526
14527
3.74k
#ifdef LIBXML_THREAD_ENABLED
14528
3.74k
    __xmlGlobalInitMutexLock();
14529
3.74k
    if (xmlParserInitialized == 0) {
14530
3.73k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.73k
  xmlInitThreadsInternal();
14537
3.73k
  xmlInitGlobalsInternal();
14538
3.73k
  xmlInitMemoryInternal();
14539
3.73k
        __xmlInitializeDict();
14540
3.73k
  xmlInitEncodingInternal();
14541
3.73k
  xmlRegisterDefaultInputCallbacks();
14542
3.73k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.73k
  xmlRegisterDefaultOutputCallbacks();
14544
3.73k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.73k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.73k
  xmlInitXPathInternal();
14547
3.73k
#endif
14548
3.73k
  xmlParserInitialized = 1;
14549
3.73k
#ifdef LIBXML_THREAD_ENABLED
14550
3.73k
    }
14551
3.74k
    __xmlGlobalInitMutexUnlock();
14552
3.74k
#endif
14553
3.74k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
1.11M
{
14843
1.11M
    if (ctxt == NULL)
14844
0
        return(-1);
14845
1.11M
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
1.11M
    if (options & XML_PARSE_RECOVER) {
14851
584k
        ctxt->recovery = 1;
14852
584k
        options -= XML_PARSE_RECOVER;
14853
584k
  ctxt->options |= XML_PARSE_RECOVER;
14854
584k
    } else
14855
526k
        ctxt->recovery = 0;
14856
1.11M
    if (options & XML_PARSE_DTDLOAD) {
14857
775k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
775k
        options -= XML_PARSE_DTDLOAD;
14859
775k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
775k
    } else
14861
334k
        ctxt->loadsubset = 0;
14862
1.11M
    if (options & XML_PARSE_DTDATTR) {
14863
441k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
441k
        options -= XML_PARSE_DTDATTR;
14865
441k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
441k
    }
14867
1.11M
    if (options & XML_PARSE_NOENT) {
14868
668k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
668k
        options -= XML_PARSE_NOENT;
14871
668k
  ctxt->options |= XML_PARSE_NOENT;
14872
668k
    } else
14873
442k
        ctxt->replaceEntities = 0;
14874
1.11M
    if (options & XML_PARSE_PEDANTIC) {
14875
219k
        ctxt->pedantic = 1;
14876
219k
        options -= XML_PARSE_PEDANTIC;
14877
219k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
219k
    } else
14879
891k
        ctxt->pedantic = 0;
14880
1.11M
    if (options & XML_PARSE_NOBLANKS) {
14881
375k
        ctxt->keepBlanks = 0;
14882
375k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
375k
        options -= XML_PARSE_NOBLANKS;
14884
375k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
375k
    } else
14886
735k
        ctxt->keepBlanks = 1;
14887
1.11M
    if (options & XML_PARSE_DTDVALID) {
14888
523k
        ctxt->validate = 1;
14889
523k
        if (options & XML_PARSE_NOWARNING)
14890
325k
            ctxt->vctxt.warning = NULL;
14891
523k
        if (options & XML_PARSE_NOERROR)
14892
354k
            ctxt->vctxt.error = NULL;
14893
523k
        options -= XML_PARSE_DTDVALID;
14894
523k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
523k
    } else
14896
586k
        ctxt->validate = 0;
14897
1.11M
    if (options & XML_PARSE_NOWARNING) {
14898
435k
        ctxt->sax->warning = NULL;
14899
435k
        options -= XML_PARSE_NOWARNING;
14900
435k
    }
14901
1.11M
    if (options & XML_PARSE_NOERROR) {
14902
469k
        ctxt->sax->error = NULL;
14903
469k
        ctxt->sax->fatalError = NULL;
14904
469k
        options -= XML_PARSE_NOERROR;
14905
469k
    }
14906
1.11M
#ifdef LIBXML_SAX1_ENABLED
14907
1.11M
    if (options & XML_PARSE_SAX1) {
14908
407k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
407k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
407k
        ctxt->sax->startElementNs = NULL;
14911
407k
        ctxt->sax->endElementNs = NULL;
14912
407k
        ctxt->sax->initialized = 1;
14913
407k
        options -= XML_PARSE_SAX1;
14914
407k
  ctxt->options |= XML_PARSE_SAX1;
14915
407k
    }
14916
1.11M
#endif /* LIBXML_SAX1_ENABLED */
14917
1.11M
    if (options & XML_PARSE_NODICT) {
14918
376k
        ctxt->dictNames = 0;
14919
376k
        options -= XML_PARSE_NODICT;
14920
376k
  ctxt->options |= XML_PARSE_NODICT;
14921
733k
    } else {
14922
733k
        ctxt->dictNames = 1;
14923
733k
    }
14924
1.11M
    if (options & XML_PARSE_NOCDATA) {
14925
404k
        ctxt->sax->cdataBlock = NULL;
14926
404k
        options -= XML_PARSE_NOCDATA;
14927
404k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
404k
    }
14929
1.11M
    if (options & XML_PARSE_NSCLEAN) {
14930
491k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
491k
        options -= XML_PARSE_NSCLEAN;
14932
491k
    }
14933
1.11M
    if (options & XML_PARSE_NONET) {
14934
373k
  ctxt->options |= XML_PARSE_NONET;
14935
373k
        options -= XML_PARSE_NONET;
14936
373k
    }
14937
1.11M
    if (options & XML_PARSE_COMPACT) {
14938
607k
  ctxt->options |= XML_PARSE_COMPACT;
14939
607k
        options -= XML_PARSE_COMPACT;
14940
607k
    }
14941
1.11M
    if (options & XML_PARSE_OLD10) {
14942
343k
  ctxt->options |= XML_PARSE_OLD10;
14943
343k
        options -= XML_PARSE_OLD10;
14944
343k
    }
14945
1.11M
    if (options & XML_PARSE_NOBASEFIX) {
14946
398k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
398k
        options -= XML_PARSE_NOBASEFIX;
14948
398k
    }
14949
1.11M
    if (options & XML_PARSE_HUGE) {
14950
340k
  ctxt->options |= XML_PARSE_HUGE;
14951
340k
        options -= XML_PARSE_HUGE;
14952
340k
        if (ctxt->dict != NULL)
14953
340k
            xmlDictSetLimit(ctxt->dict, 0);
14954
340k
    }
14955
1.11M
    if (options & XML_PARSE_OLDSAX) {
14956
369k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
369k
        options -= XML_PARSE_OLDSAX;
14958
369k
    }
14959
1.11M
    if (options & XML_PARSE_IGNORE_ENC) {
14960
461k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
461k
        options -= XML_PARSE_IGNORE_ENC;
14962
461k
    }
14963
1.11M
    if (options & XML_PARSE_BIG_LINES) {
14964
422k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
422k
        options -= XML_PARSE_BIG_LINES;
14966
422k
    }
14967
1.11M
    ctxt->linenumbers = 1;
14968
1.11M
    return (options);
14969
1.11M
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
740k
{
14984
740k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
740k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
369k
{
15003
369k
    xmlDocPtr ret;
15004
15005
369k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
369k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
369k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
369k
        (ctxt->input->filename == NULL))
15015
369k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
369k
    xmlParseDocument(ctxt);
15017
369k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
214k
        ret = ctxt->myDoc;
15019
154k
    else {
15020
154k
        ret = NULL;
15021
154k
  if (ctxt->myDoc != NULL) {
15022
136k
      xmlFreeDoc(ctxt->myDoc);
15023
136k
  }
15024
154k
    }
15025
369k
    ctxt->myDoc = NULL;
15026
369k
    if (!reuse) {
15027
369k
  xmlFreeParserCtxt(ctxt);
15028
369k
    }
15029
15030
369k
    return (ret);
15031
369k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
370k
{
15096
370k
    xmlParserCtxtPtr ctxt;
15097
15098
370k
    xmlInitParser();
15099
370k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
370k
    if (ctxt == NULL)
15101
979
        return (NULL);
15102
369k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
370k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387