Coverage Report

Created: 2024-05-29 15:23

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
4.03M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
362
#define XML_PARSER_NON_LINEAR 10
129
130
27.8M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
41.6M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
3.34G
#define XML_PARSER_BUFFER_SIZE 100
147
330k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
22.0M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
10.6k
{
215
10.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
10.6k
        (ctxt->instate == XML_PARSER_EOF))
217
18
  return;
218
10.6k
    if (ctxt != NULL)
219
10.6k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
10.6k
    if (prefix == NULL)
222
8.55k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
8.55k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
8.55k
                        (const char *) localname, NULL, NULL, 0, 0,
225
8.55k
                        "Attribute %s redefined\n", localname);
226
2.05k
    else
227
2.05k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
2.05k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
2.05k
                        (const char *) prefix, (const char *) localname,
230
2.05k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
2.05k
                        localname);
232
10.6k
    if (ctxt != NULL) {
233
10.6k
  ctxt->wellFormed = 0;
234
10.6k
  if (ctxt->recovery == 0)
235
4.70k
      ctxt->disableSAX = 1;
236
10.6k
    }
237
10.6k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
818k
{
250
818k
    const char *errmsg;
251
252
818k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
818k
        (ctxt->instate == XML_PARSER_EOF))
254
6.25k
  return;
255
812k
    switch (error) {
256
23.4k
        case XML_ERR_INVALID_HEX_CHARREF:
257
23.4k
            errmsg = "CharRef: invalid hexadecimal value";
258
23.4k
            break;
259
16.0k
        case XML_ERR_INVALID_DEC_CHARREF:
260
16.0k
            errmsg = "CharRef: invalid decimal value";
261
16.0k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
302k
        case XML_ERR_INTERNAL_ERROR:
266
302k
            errmsg = "internal error";
267
302k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
11.0k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
11.0k
            errmsg = "PEReference: expecting ';'";
282
11.0k
            break;
283
782
        case XML_ERR_ENTITY_LOOP:
284
782
            errmsg = "Detected an entity reference loop";
285
782
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
3.13k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
3.13k
            errmsg = "PEReferences forbidden in internal subset";
291
3.13k
            break;
292
816
        case XML_ERR_ENTITY_NOT_FINISHED:
293
816
            errmsg = "EntityValue: \" or ' expected";
294
816
            break;
295
28.4k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
28.4k
            errmsg = "AttValue: \" or ' expected";
297
28.4k
            break;
298
84.2k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
84.2k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
84.2k
            break;
301
7.72k
        case XML_ERR_LITERAL_NOT_STARTED:
302
7.72k
            errmsg = "SystemLiteral \" or ' expected";
303
7.72k
            break;
304
8.26k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
8.26k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
8.26k
            break;
307
7.83k
        case XML_ERR_MISPLACED_CDATA_END:
308
7.83k
            errmsg = "Sequence ']]>' not allowed in content";
309
7.83k
            break;
310
6.01k
        case XML_ERR_URI_REQUIRED:
311
6.01k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
6.01k
            break;
313
1.78k
        case XML_ERR_PUBID_REQUIRED:
314
1.78k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
1.78k
            break;
316
7.28k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
7.28k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
7.28k
            break;
319
6.42k
        case XML_ERR_PI_NOT_STARTED:
320
6.42k
            errmsg = "xmlParsePI : no target name";
321
6.42k
            break;
322
1.45k
        case XML_ERR_RESERVED_XML_NAME:
323
1.45k
            errmsg = "Invalid PI name";
324
1.45k
            break;
325
682
        case XML_ERR_NOTATION_NOT_STARTED:
326
682
            errmsg = "NOTATION: Name expected here";
327
682
            break;
328
5.61k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
5.61k
            errmsg = "'>' required to close NOTATION declaration";
330
5.61k
            break;
331
4.43k
        case XML_ERR_VALUE_REQUIRED:
332
4.43k
            errmsg = "Entity value required";
333
4.43k
            break;
334
1.72k
        case XML_ERR_URI_FRAGMENT:
335
1.72k
            errmsg = "Fragment not allowed";
336
1.72k
            break;
337
4.50k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
4.50k
            errmsg = "'(' required to start ATTLIST enumeration";
339
4.50k
            break;
340
492
        case XML_ERR_NMTOKEN_REQUIRED:
341
492
            errmsg = "NmToken expected in ATTLIST enumeration";
342
492
            break;
343
1.37k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
1.37k
            errmsg = "')' required to finish ATTLIST enumeration";
345
1.37k
            break;
346
1.05k
        case XML_ERR_MIXED_NOT_STARTED:
347
1.05k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
1.05k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
2.77k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
2.77k
            errmsg = "ContentDecl : Name or '(' expected";
354
2.77k
            break;
355
4.07k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
4.07k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
4.07k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
53.6k
        case XML_ERR_GT_REQUIRED:
363
53.6k
            errmsg = "expected '>'";
364
53.6k
            break;
365
78
        case XML_ERR_CONDSEC_INVALID:
366
78
            errmsg = "XML conditional section '[' expected";
367
78
            break;
368
5.14k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
5.14k
            errmsg = "Content error in the external subset";
370
5.14k
            break;
371
418
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
418
            errmsg =
373
418
                "conditional section INCLUDE or IGNORE keyword expected";
374
418
            break;
375
492
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
492
            errmsg = "XML conditional section not closed";
377
492
            break;
378
111
        case XML_ERR_XMLDECL_NOT_STARTED:
379
111
            errmsg = "Text declaration '<?xml' required";
380
111
            break;
381
35.3k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
35.3k
            errmsg = "parsing XML declaration: '?>' expected";
383
35.3k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
73.9k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
73.9k
            errmsg = "EntityRef: expecting ';'";
389
73.9k
            break;
390
3.20k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
3.20k
            errmsg = "DOCTYPE improperly terminated";
392
3.20k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
3.01k
        case XML_ERR_EQUAL_REQUIRED:
397
3.01k
            errmsg = "expected '='";
398
3.01k
            break;
399
9.29k
        case XML_ERR_STRING_NOT_CLOSED:
400
9.29k
            errmsg = "String not closed expecting \" or '";
401
9.29k
            break;
402
1.67k
        case XML_ERR_STRING_NOT_STARTED:
403
1.67k
            errmsg = "String not started expecting ' or \"";
404
1.67k
            break;
405
1.37k
        case XML_ERR_ENCODING_NAME:
406
1.37k
            errmsg = "Invalid XML encoding name";
407
1.37k
            break;
408
330
        case XML_ERR_STANDALONE_VALUE:
409
330
            errmsg = "standalone accepts only 'yes' or 'no'";
410
330
            break;
411
8.82k
        case XML_ERR_DOCUMENT_EMPTY:
412
8.82k
            errmsg = "Document is empty";
413
8.82k
            break;
414
50.2k
        case XML_ERR_DOCUMENT_END:
415
50.2k
            errmsg = "Extra content at the end of the document";
416
50.2k
            break;
417
8.92k
        case XML_ERR_NOT_WELL_BALANCED:
418
8.92k
            errmsg = "chunk is not well balanced";
419
8.92k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
10.7k
        case XML_ERR_VERSION_MISSING:
424
10.7k
            errmsg = "Malformed declaration expecting version";
425
10.7k
            break;
426
88
        case XML_ERR_NAME_TOO_LONG:
427
88
            errmsg = "Name too long";
428
88
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
856
        default:
435
856
            errmsg = "Unregistered error message";
436
812k
    }
437
812k
    if (ctxt != NULL)
438
812k
  ctxt->errNo = error;
439
812k
    if (info == NULL) {
440
509k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
509k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
509k
                        errmsg);
443
509k
    } else {
444
302k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
302k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
302k
                        errmsg, info);
447
302k
    }
448
812k
    if (ctxt != NULL) {
449
812k
  ctxt->wellFormed = 0;
450
812k
  if (ctxt->recovery == 0)
451
183k
      ctxt->disableSAX = 1;
452
812k
    }
453
812k
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
1.12M
{
467
1.12M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
1.12M
        (ctxt->instate == XML_PARSER_EOF))
469
66
  return;
470
1.12M
    if (ctxt != NULL)
471
1.12M
  ctxt->errNo = error;
472
1.12M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
1.12M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
1.12M
    if (ctxt != NULL) {
475
1.12M
  ctxt->wellFormed = 0;
476
1.12M
  if (ctxt->recovery == 0)
477
307k
      ctxt->disableSAX = 1;
478
1.12M
    }
479
1.12M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
1.70M
{
495
1.70M
    xmlStructuredErrorFunc schannel = NULL;
496
497
1.70M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
1.70M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
1.70M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
1.70M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
978k
        schannel = ctxt->sax->serror;
503
1.70M
    if (ctxt != NULL) {
504
1.70M
        __xmlRaiseError(schannel,
505
1.70M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
1.70M
                    ctxt->userData,
507
1.70M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
1.70M
                    XML_ERR_WARNING, NULL, 0,
509
1.70M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
1.70M
        msg, (const char *) str1, (const char *) str2);
511
1.70M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
1.70M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
15.3k
{
533
15.3k
    xmlStructuredErrorFunc schannel = NULL;
534
535
15.3k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
15.3k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
15.3k
    if (ctxt != NULL) {
539
15.3k
  ctxt->errNo = error;
540
15.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
8.01k
      schannel = ctxt->sax->serror;
542
15.3k
    }
543
15.3k
    if (ctxt != NULL) {
544
15.3k
        __xmlRaiseError(schannel,
545
15.3k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
15.3k
                    ctxt, NULL, XML_FROM_DTD, error,
547
15.3k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
15.3k
        (const char *) str2, NULL, 0, 0,
549
15.3k
        msg, (const char *) str1, (const char *) str2);
550
15.3k
  ctxt->valid = 0;
551
15.3k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
15.3k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
627k
{
573
627k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
627k
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
627k
    if (ctxt != NULL)
577
627k
  ctxt->errNo = error;
578
627k
    __xmlRaiseError(NULL, NULL, NULL,
579
627k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
627k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
627k
    if (ctxt != NULL) {
582
627k
  ctxt->wellFormed = 0;
583
627k
  if (ctxt->recovery == 0)
584
122k
      ctxt->disableSAX = 1;
585
627k
    }
586
627k
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
387k
{
604
387k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
387k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
387k
    if (ctxt != NULL)
608
387k
  ctxt->errNo = error;
609
387k
    __xmlRaiseError(NULL, NULL, NULL,
610
387k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
387k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
387k
        NULL, val, 0, msg, str1, val, str2);
613
387k
    if (ctxt != NULL) {
614
387k
  ctxt->wellFormed = 0;
615
387k
  if (ctxt->recovery == 0)
616
137k
      ctxt->disableSAX = 1;
617
387k
    }
618
387k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
1.23M
{
633
1.23M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
1.23M
        (ctxt->instate == XML_PARSER_EOF))
635
28
  return;
636
1.23M
    if (ctxt != NULL)
637
1.23M
  ctxt->errNo = error;
638
1.23M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
1.23M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
1.23M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
1.23M
                    val);
642
1.23M
    if (ctxt != NULL) {
643
1.23M
  ctxt->wellFormed = 0;
644
1.23M
  if (ctxt->recovery == 0)
645
298k
      ctxt->disableSAX = 1;
646
1.23M
    }
647
1.23M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
405k
{
662
405k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
405k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
405k
    if (ctxt != NULL)
666
405k
  ctxt->errNo = error;
667
405k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
405k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
405k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
405k
                    val);
671
405k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
292k
{
689
292k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
292k
        (ctxt->instate == XML_PARSER_EOF))
691
113
  return;
692
292k
    if (ctxt != NULL)
693
292k
  ctxt->errNo = error;
694
292k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
292k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
292k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
292k
                    info1, info2, info3);
698
292k
    if (ctxt != NULL)
699
292k
  ctxt->nsWellFormed = 0;
700
292k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
3.55k
{
718
3.55k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
3.55k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
3.55k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
3.55k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
3.55k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
3.55k
                    info1, info2, info3);
725
3.55k
}
726
727
static void
728
99.5M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
99.5M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
99.5M
    else
732
99.5M
        *dst += val;
733
99.5M
}
734
735
static void
736
28.7M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
28.7M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
28.7M
    else
740
28.7M
        *dst += val;
741
28.7M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
27.8M
{
770
27.8M
    unsigned long consumed;
771
27.8M
    xmlParserInputPtr input = ctxt->input;
772
27.8M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
27.8M
    consumed = input->parentConsumed;
779
27.8M
    if ((entity == NULL) ||
780
27.8M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
14.8M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
14.8M
        xmlSaturatedAdd(&consumed, input->consumed);
783
14.8M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
14.8M
    }
785
27.8M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
27.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
27.8M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
27.8M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
27.8M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
362
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
362
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
362
                       "Maximum entity amplification factor exceeded");
803
362
        xmlHaltParser(ctxt);
804
362
        return(1);
805
362
    }
806
807
27.8M
    return(0);
808
27.8M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
470k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
470k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
470k
    (void) sax;
1048
1049
470k
    if (ctxt == NULL) return;
1050
470k
    sax = ctxt->sax;
1051
470k
#ifdef LIBXML_SAX1_ENABLED
1052
470k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
470k
        ((sax->startElementNs != NULL) ||
1054
288k
         (sax->endElementNs != NULL) ||
1055
288k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
288k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
470k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
470k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
470k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
470k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
470k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
470k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
102k
{
1103
102k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
143k
    while (*src == 0x20) src++;
1107
1.62M
    while (*src != 0) {
1108
1.51M
  if (*src == 0x20) {
1109
490k
      while (*src == 0x20) src++;
1110
134k
      if (*src != 0)
1111
112k
    *dst++ = 0x20;
1112
1.38M
  } else {
1113
1.38M
      *dst++ = *src++;
1114
1.38M
  }
1115
1.51M
    }
1116
102k
    *dst = 0;
1117
102k
    if (dst == src)
1118
76.2k
       return(NULL);
1119
26.4k
    return(dst);
1120
102k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
23.9k
{
1136
23.9k
    int i;
1137
23.9k
    int remove_head = 0;
1138
23.9k
    int need_realloc = 0;
1139
23.9k
    const xmlChar *cur;
1140
1141
23.9k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
23.9k
    i = *len;
1144
23.9k
    if (i <= 0)
1145
728
        return(NULL);
1146
1147
23.2k
    cur = src;
1148
31.7k
    while (*cur == 0x20) {
1149
8.46k
        cur++;
1150
8.46k
  remove_head++;
1151
8.46k
    }
1152
808k
    while (*cur != 0) {
1153
790k
  if (*cur == 0x20) {
1154
53.5k
      cur++;
1155
53.5k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
4.83k
          need_realloc = 1;
1157
4.83k
    break;
1158
4.83k
      }
1159
53.5k
  } else
1160
736k
      cur++;
1161
790k
    }
1162
23.2k
    if (need_realloc) {
1163
4.83k
        xmlChar *ret;
1164
1165
4.83k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
4.83k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
4.83k
  xmlAttrNormalizeSpace(ret, ret);
1171
4.83k
  *len = strlen((const char *)ret);
1172
4.83k
        return(ret);
1173
18.4k
    } else if (remove_head) {
1174
608
        *len -= remove_head;
1175
608
        memmove(src, src + remove_head, 1 + *len);
1176
608
  return(src);
1177
608
    }
1178
17.8k
    return(NULL);
1179
23.2k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
128k
               const xmlChar *value) {
1195
128k
    xmlDefAttrsPtr defaults;
1196
128k
    int len;
1197
128k
    const xmlChar *name;
1198
128k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
128k
    if (ctxt->attsSpecial != NULL) {
1204
117k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
31.5k
      return;
1206
117k
    }
1207
1208
96.8k
    if (ctxt->attsDefault == NULL) {
1209
15.0k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
15.0k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
15.0k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
96.8k
    name = xmlSplitQName3(fullname, &len);
1219
96.8k
    if (name == NULL) {
1220
93.2k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
93.2k
  prefix = NULL;
1222
93.2k
    } else {
1223
3.61k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
3.61k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
3.61k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
96.8k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
96.8k
    if (defaults == NULL) {
1232
56.7k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
56.7k
                     (4 * 5) * sizeof(const xmlChar *));
1234
56.7k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
56.7k
  defaults->nbAttrs = 0;
1237
56.7k
  defaults->maxAttrs = 4;
1238
56.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
56.7k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
56.7k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
1.89k
        xmlDefAttrsPtr temp;
1245
1246
1.89k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
1.89k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
1.89k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
1.89k
  defaults = temp;
1251
1.89k
  defaults->maxAttrs *= 2;
1252
1.89k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
1.89k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
1.89k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
96.8k
    name = xmlSplitQName3(fullattr, &len);
1264
96.8k
    if (name == NULL) {
1265
82.6k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
82.6k
  prefix = NULL;
1267
82.6k
    } else {
1268
14.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
14.2k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
14.2k
    }
1271
1272
96.8k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
96.8k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
96.8k
    len = xmlStrlen(value);
1276
96.8k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
96.8k
    if (value == NULL)
1278
0
        goto mem_error;
1279
96.8k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
96.8k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
96.8k
    if (ctxt->external)
1282
29.8k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
67.0k
    else
1284
67.0k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
96.8k
    defaults->nbAttrs++;
1286
1287
96.8k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
96.8k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
1.49M
{
1309
1.49M
    if (ctxt->attsSpecial == NULL) {
1310
29.7k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
29.7k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
29.7k
    }
1314
1315
1.49M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
430k
        return;
1317
1318
1.06M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
1.06M
                     (void *) (ptrdiff_t) type);
1320
1.06M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
1.49M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
762k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
762k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
762k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
281k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
281k
    }
1341
762k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
124k
{
1354
124k
    if (ctxt->attsSpecial == NULL)
1355
100k
        return;
1356
1357
24.4k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
24.4k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
5.27k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
5.27k
        ctxt->attsSpecial = NULL;
1362
5.27k
    }
1363
24.4k
    return;
1364
124k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
11.4k
{
1427
11.4k
    const xmlChar *cur = lang, *nxt;
1428
1429
11.4k
    if (cur == NULL)
1430
142
        return (0);
1431
11.3k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
11.3k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
11.3k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
11.3k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
609
        cur += 2;
1441
4.21k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
4.21k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
3.61k
            cur++;
1444
609
        return(cur[0] == 0);
1445
609
    }
1446
10.6k
    nxt = cur;
1447
37.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
37.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
26.9k
           nxt++;
1450
10.6k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
381
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
308
            return(0);
1456
73
        return(1);
1457
381
    }
1458
10.3k
    if (nxt - cur < 2)
1459
616
        return(0);
1460
    /* we got an ISO 639 code */
1461
9.69k
    if (nxt[0] == 0)
1462
4.85k
        return(1);
1463
4.84k
    if (nxt[0] != '-')
1464
1.15k
        return(0);
1465
1466
3.68k
    nxt++;
1467
3.68k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
3.68k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
474
        goto region_m49;
1471
1472
17.4k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
17.4k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
14.2k
           nxt++;
1475
3.21k
    if (nxt - cur == 4)
1476
1.00k
        goto script;
1477
2.21k
    if (nxt - cur == 2)
1478
556
        goto region;
1479
1.65k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
133
        goto variant;
1481
1.52k
    if (nxt - cur != 3)
1482
566
        return(0);
1483
    /* we parsed an extlang */
1484
957
    if (nxt[0] == 0)
1485
102
        return(1);
1486
855
    if (nxt[0] != '-')
1487
130
        return(0);
1488
1489
725
    nxt++;
1490
725
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
725
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
75
        goto region_m49;
1494
1495
5.05k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
5.05k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
4.40k
           nxt++;
1498
650
    if (nxt - cur == 2)
1499
113
        goto region;
1500
537
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
119
        goto variant;
1502
418
    if (nxt - cur != 4)
1503
251
        return(0);
1504
    /* we parsed a script */
1505
1.16k
script:
1506
1.16k
    if (nxt[0] == 0)
1507
119
        return(1);
1508
1.04k
    if (nxt[0] != '-')
1509
120
        return(0);
1510
1511
928
    nxt++;
1512
928
    cur = nxt;
1513
    /* now we can have region or variant */
1514
928
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
132
        goto region_m49;
1516
1517
5.19k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
5.19k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
4.39k
           nxt++;
1520
1521
796
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
103
        goto variant;
1523
693
    if (nxt - cur != 2)
1524
572
        return(0);
1525
    /* we parsed a region */
1526
1.17k
region:
1527
1.17k
    if (nxt[0] == 0)
1528
407
        return(1);
1529
764
    if (nxt[0] != '-')
1530
490
        return(0);
1531
1532
274
    nxt++;
1533
274
    cur = nxt;
1534
    /* now we can just have a variant */
1535
3.11k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
3.11k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
2.84k
           nxt++;
1538
1539
274
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
165
        return(0);
1541
1542
    /* we parsed a variant */
1543
464
variant:
1544
464
    if (nxt[0] == 0)
1545
94
        return(1);
1546
370
    if (nxt[0] != '-')
1547
323
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
47
    return (1);
1550
1551
681
region_m49:
1552
681
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
681
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
381
        nxt += 3;
1555
381
        goto region;
1556
381
    }
1557
300
    return(0);
1558
681
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
79.2k
{
1584
79.2k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
39.3k
        int i;
1586
79.3k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
55.6k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
15.7k
          if (ctxt->nsTab[i + 1] == URL)
1590
6.39k
        return(-2);
1591
    /* out of scope keep it */
1592
9.31k
    break;
1593
15.7k
      }
1594
55.6k
  }
1595
39.3k
    }
1596
72.8k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
21.1k
  ctxt->nsMax = 10;
1598
21.1k
  ctxt->nsNr = 0;
1599
21.1k
  ctxt->nsTab = (const xmlChar **)
1600
21.1k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
21.1k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
51.7k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
2.14k
        const xmlChar ** tmp;
1608
2.14k
        ctxt->nsMax *= 2;
1609
2.14k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
2.14k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
2.14k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
2.14k
  ctxt->nsTab = tmp;
1617
2.14k
    }
1618
72.8k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
72.8k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
72.8k
    return (ctxt->nsNr);
1621
72.8k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
28.6k
{
1634
28.6k
    int i;
1635
1636
28.6k
    if (ctxt->nsTab == NULL) return(0);
1637
28.6k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
28.6k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
109k
    for (i = 0;i < nr;i++) {
1645
80.6k
         ctxt->nsNr--;
1646
80.6k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
80.6k
    }
1648
28.6k
    return(nr);
1649
28.6k
}
1650
#endif
1651
1652
static int
1653
59.2k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
59.2k
    const xmlChar **atts;
1655
59.2k
    int *attallocs;
1656
59.2k
    int maxatts;
1657
1658
59.2k
    if (nr + 5 > ctxt->maxatts) {
1659
59.2k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
59.2k
  atts = (const xmlChar **) xmlMalloc(
1661
59.2k
             maxatts * sizeof(const xmlChar *));
1662
59.2k
  if (atts == NULL) goto mem_error;
1663
59.2k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
59.2k
                               (maxatts / 5) * sizeof(int));
1665
59.2k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
59.2k
        if (ctxt->maxatts > 0)
1670
158
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
59.2k
        xmlFree(ctxt->atts);
1672
59.2k
  ctxt->atts = atts;
1673
59.2k
  ctxt->attallocs = attallocs;
1674
59.2k
  ctxt->maxatts = maxatts;
1675
59.2k
    }
1676
59.2k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
59.2k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
13.3M
{
1694
13.3M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
13.3M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
1.01k
        size_t newSize = ctxt->inputMax * 2;
1698
1.01k
        xmlParserInputPtr *tmp;
1699
1700
1.01k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
1.01k
                                               newSize * sizeof(*tmp));
1702
1.01k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
1.01k
        ctxt->inputTab = tmp;
1707
1.01k
        ctxt->inputMax = newSize;
1708
1.01k
    }
1709
13.3M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
13.3M
    ctxt->input = value;
1711
13.3M
    return (ctxt->inputNr++);
1712
13.3M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
14.5M
{
1724
14.5M
    xmlParserInputPtr ret;
1725
1726
14.5M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
14.5M
    if (ctxt->inputNr <= 0)
1729
1.16M
        return (NULL);
1730
13.3M
    ctxt->inputNr--;
1731
13.3M
    if (ctxt->inputNr > 0)
1732
13.0M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
317k
    else
1734
317k
        ctxt->input = NULL;
1735
13.3M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
13.3M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
13.3M
    return (ret);
1738
14.5M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
6.21M
{
1751
6.21M
    if (ctxt == NULL) return(0);
1752
6.21M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
7.39k
        xmlNodePtr *tmp;
1754
1755
7.39k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
7.39k
                                      ctxt->nodeMax * 2 *
1757
7.39k
                                      sizeof(ctxt->nodeTab[0]));
1758
7.39k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
7.39k
        ctxt->nodeTab = tmp;
1763
7.39k
  ctxt->nodeMax *= 2;
1764
7.39k
    }
1765
6.21M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
6.21M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
6.21M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
6.21M
    ctxt->node = value;
1775
6.21M
    return (ctxt->nodeNr++);
1776
6.21M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
5.95M
{
1789
5.95M
    xmlNodePtr ret;
1790
1791
5.95M
    if (ctxt == NULL) return(NULL);
1792
5.95M
    if (ctxt->nodeNr <= 0)
1793
89.5k
        return (NULL);
1794
5.86M
    ctxt->nodeNr--;
1795
5.86M
    if (ctxt->nodeNr > 0)
1796
5.66M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
199k
    else
1798
199k
        ctxt->node = NULL;
1799
5.86M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
5.86M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
5.86M
    return (ret);
1802
5.95M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
5.62M
{
1821
5.62M
    xmlStartTag *tag;
1822
1823
5.62M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
17.6k
        const xmlChar * *tmp;
1825
17.6k
        xmlStartTag *tmp2;
1826
17.6k
        ctxt->nameMax *= 2;
1827
17.6k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
17.6k
                                    ctxt->nameMax *
1829
17.6k
                                    sizeof(ctxt->nameTab[0]));
1830
17.6k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
17.6k
  ctxt->nameTab = tmp;
1835
17.6k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
17.6k
                                    ctxt->nameMax *
1837
17.6k
                                    sizeof(ctxt->pushTab[0]));
1838
17.6k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
17.6k
  ctxt->pushTab = tmp2;
1843
5.61M
    } else if (ctxt->pushTab == NULL) {
1844
174k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
174k
                                            sizeof(ctxt->pushTab[0]));
1846
174k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
174k
    }
1849
5.62M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
5.62M
    ctxt->name = value;
1851
5.62M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
5.62M
    tag->prefix = prefix;
1853
5.62M
    tag->URI = URI;
1854
5.62M
    tag->line = line;
1855
5.62M
    tag->nsNr = nsNr;
1856
5.62M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
5.62M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.00M
{
1873
1.00M
    const xmlChar *ret;
1874
1875
1.00M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.00M
    ctxt->nameNr--;
1878
1.00M
    if (ctxt->nameNr > 0)
1879
994k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
14.4k
    else
1881
14.4k
        ctxt->name = NULL;
1882
1.00M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.00M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.00M
    return (ret);
1885
1.00M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
4.09M
{
1931
4.09M
    const xmlChar *ret;
1932
1933
4.09M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
4.09M
    ctxt->nameNr--;
1936
4.09M
    if (ctxt->nameNr > 0)
1937
3.88M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
201k
    else
1939
201k
        ctxt->name = NULL;
1940
4.09M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
4.09M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
4.09M
    return (ret);
1943
4.09M
}
1944
1945
7.43M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
7.43M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
11.2k
        int *tmp;
1948
1949
11.2k
  ctxt->spaceMax *= 2;
1950
11.2k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
11.2k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
11.2k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
11.2k
  ctxt->spaceTab = tmp;
1958
11.2k
    }
1959
7.43M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
7.43M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
7.43M
    return(ctxt->spaceNr++);
1962
7.43M
}
1963
1964
7.14M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
7.14M
    int ret;
1966
7.14M
    if (ctxt->spaceNr <= 0) return(0);
1967
7.13M
    ctxt->spaceNr--;
1968
7.13M
    if (ctxt->spaceNr > 0)
1969
7.09M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
38.1k
    else
1971
38.1k
        ctxt->space = &ctxt->spaceTab[0];
1972
7.13M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
7.13M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
7.13M
    return(ret);
1975
7.14M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
226M
#define RAW (*ctxt->input->cur)
2013
167M
#define CUR (*ctxt->input->cur)
2014
138M
#define NXT(val) ctxt->input->cur[(val)]
2015
15.0M
#define CUR_PTR ctxt->input->cur
2016
345k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
62.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
31.3M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
57.3M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
48.7M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
41.6M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
33.8M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
14.8M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
14.8M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
68.6k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
68.6k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
56.1M
#define SKIP(val) do {             \
2037
56.1M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
56.1M
    if (*ctxt->input->cur == 0)           \
2039
56.1M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
56.1M
  } while (0)
2041
2042
35.4k
#define SKIPL(val) do {             \
2043
35.4k
    int skipl;                \
2044
8.83M
    for(skipl=0; skipl<val; skipl++) {         \
2045
8.80M
  if (*(ctxt->input->cur) == '\n') {       \
2046
206k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
8.59M
  } else ctxt->input->col++;         \
2048
8.80M
  ctxt->input->cur++;           \
2049
8.80M
    }                  \
2050
35.4k
    if (*ctxt->input->cur == 0)           \
2051
35.4k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
35.4k
  } while (0)
2053
2054
63.3M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
63.3M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
63.3M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
63.3M
  xmlSHRINK (ctxt);
2058
2059
889k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
889k
    if ((ctxt->input->buf) &&
2062
889k
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
4.65k
        xmlParserInputShrink(ctxt->input);
2064
889k
    if (*ctxt->input->cur == 0)
2065
36.2k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
889k
}
2067
2068
245M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
245M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
245M
  xmlGROW (ctxt);
2071
2072
35.9M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
35.9M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
35.9M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
35.9M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
35.9M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
35.9M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
35.9M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
35.9M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
35.9M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
35.9M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
35.9M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
542k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
35.9M
}
2095
2096
69.9M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
150M
#define NEXT xmlNextChar(ctxt)
2099
2100
12.7M
#define NEXT1 {               \
2101
12.7M
  ctxt->input->col++;           \
2102
12.7M
  ctxt->input->cur++;           \
2103
12.7M
  if (*ctxt->input->cur == 0)         \
2104
12.7M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
12.7M
    }
2106
2107
139M
#define NEXTL(l) do {             \
2108
139M
    if (*(ctxt->input->cur) == '\n') {         \
2109
2.90M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
136M
    } else ctxt->input->col++;           \
2111
139M
    ctxt->input->cur += l;        \
2112
139M
  } while (0)
2113
2114
144M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
1.13G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
1.22G
    if (l == 1) b[i++] = v;           \
2119
1.22G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
69.9M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
69.9M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
69.9M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
69.9M
        (ctxt->instate == XML_PARSER_START)) {
2141
31.5M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
31.5M
  cur = ctxt->input->cur;
2146
31.5M
  while (IS_BLANK_CH(*cur)) {
2147
12.0M
      if (*cur == '\n') {
2148
534k
    ctxt->input->line++; ctxt->input->col = 1;
2149
11.5M
      } else {
2150
11.5M
    ctxt->input->col++;
2151
11.5M
      }
2152
12.0M
      cur++;
2153
12.0M
      if (res < INT_MAX)
2154
12.0M
    res++;
2155
12.0M
      if (*cur == 0) {
2156
39.0k
    ctxt->input->cur = cur;
2157
39.0k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
39.0k
    cur = ctxt->input->cur;
2159
39.0k
      }
2160
12.0M
  }
2161
31.5M
  ctxt->input->cur = cur;
2162
38.4M
    } else {
2163
38.4M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
144M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
144M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
78.0M
    NEXT;
2168
78.0M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
15.0M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
369k
                    break;
2174
14.7M
          xmlParsePEReference(ctxt);
2175
51.1M
            } else if (CUR == 0) {
2176
13.0M
                unsigned long consumed;
2177
13.0M
                xmlEntityPtr ent;
2178
2179
13.0M
                if (ctxt->inputNr <= 1)
2180
19.7k
                    break;
2181
2182
13.0M
                consumed = ctxt->input->consumed;
2183
13.0M
                xmlSaturatedAddSizeT(&consumed,
2184
13.0M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
13.0M
                ent = ctxt->input->entity;
2191
13.0M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
13.0M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
7.71k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
7.71k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
7.71k
                }
2197
2198
13.0M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
13.0M
                xmlPopInput(ctxt);
2201
38.0M
            } else {
2202
38.0M
                break;
2203
38.0M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
105M
      if (res < INT_MAX)
2213
105M
    res++;
2214
105M
        }
2215
38.4M
    }
2216
69.9M
    return(res);
2217
69.9M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
13.0M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
13.0M
    xmlParserInputPtr input;
2237
2238
13.0M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
13.0M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
13.0M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
13.0M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
13.0M
    input = inputPop(ctxt);
2247
13.0M
    if (input->entity != NULL)
2248
13.0M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
13.0M
    xmlFreeInputStream(input);
2250
13.0M
    if (*ctxt->input->cur == 0)
2251
6.05M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
13.0M
    return(CUR);
2253
13.0M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
13.0M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
13.0M
    int ret;
2267
13.0M
    if (input == NULL) return(-1);
2268
2269
13.0M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
13.0M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
13.0M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
13.0M
    ret = inputPush(ctxt, input);
2285
13.0M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
13.0M
    GROW;
2288
13.0M
    return(ret);
2289
13.0M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
347k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
347k
    int val = 0;
2311
347k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
347k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
347k
        (NXT(2) == 'x')) {
2318
187k
  SKIP(3);
2319
187k
  GROW;
2320
517k
  while (RAW != ';') { /* loop blocked by count */
2321
352k
      if (count++ > 20) {
2322
9.58k
    count = 0;
2323
9.58k
    GROW;
2324
9.58k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
9.58k
      }
2327
352k
      if ((RAW >= '0') && (RAW <= '9'))
2328
171k
          val = val * 16 + (CUR - '0');
2329
181k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
130k
          val = val * 16 + (CUR - 'a') + 10;
2331
50.5k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
28.1k
          val = val * 16 + (CUR - 'A') + 10;
2333
22.3k
      else {
2334
22.3k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
22.3k
    val = 0;
2336
22.3k
    break;
2337
22.3k
      }
2338
330k
      if (val > 0x110000)
2339
108k
          val = 0x110000;
2340
2341
330k
      NEXT;
2342
330k
      count++;
2343
330k
  }
2344
187k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
164k
      ctxt->input->col++;
2347
164k
      ctxt->input->cur++;
2348
164k
  }
2349
187k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
160k
  SKIP(2);
2351
160k
  GROW;
2352
669k
  while (RAW != ';') { /* loop blocked by count */
2353
523k
      if (count++ > 20) {
2354
11.2k
    count = 0;
2355
11.2k
    GROW;
2356
11.2k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
11.2k
      }
2359
523k
      if ((RAW >= '0') && (RAW <= '9'))
2360
509k
          val = val * 10 + (CUR - '0');
2361
14.5k
      else {
2362
14.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
14.5k
    val = 0;
2364
14.5k
    break;
2365
14.5k
      }
2366
509k
      if (val > 0x110000)
2367
122k
          val = 0x110000;
2368
2369
509k
      NEXT;
2370
509k
      count++;
2371
509k
  }
2372
160k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
145k
      ctxt->input->col++;
2375
145k
      ctxt->input->cur++;
2376
145k
  }
2377
160k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
347k
    if (val >= 0x110000) {
2389
867
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
867
                "xmlParseCharRef: character reference out of bounds\n",
2391
867
          val);
2392
346k
    } else if (IS_CHAR(val)) {
2393
307k
        return(val);
2394
307k
    } else {
2395
38.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
38.8k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
38.8k
                    val);
2398
38.8k
    }
2399
39.6k
    return(0);
2400
347k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
129k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
129k
    const xmlChar *ptr;
2423
129k
    xmlChar cur;
2424
129k
    int val = 0;
2425
2426
129k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
129k
    ptr = *str;
2428
129k
    cur = *ptr;
2429
129k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
27.7k
  ptr += 3;
2431
27.7k
  cur = *ptr;
2432
66.3k
  while (cur != ';') { /* Non input consuming loop */
2433
39.7k
      if ((cur >= '0') && (cur <= '9'))
2434
12.1k
          val = val * 16 + (cur - '0');
2435
27.6k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
4.04k
          val = val * 16 + (cur - 'a') + 10;
2437
23.5k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
22.4k
          val = val * 16 + (cur - 'A') + 10;
2439
1.07k
      else {
2440
1.07k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
1.07k
    val = 0;
2442
1.07k
    break;
2443
1.07k
      }
2444
38.6k
      if (val > 0x110000)
2445
4.71k
          val = 0x110000;
2446
2447
38.6k
      ptr++;
2448
38.6k
      cur = *ptr;
2449
38.6k
  }
2450
27.7k
  if (cur == ';')
2451
26.6k
      ptr++;
2452
101k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
101k
  ptr += 2;
2454
101k
  cur = *ptr;
2455
328k
  while (cur != ';') { /* Non input consuming loops */
2456
227k
      if ((cur >= '0') && (cur <= '9'))
2457
226k
          val = val * 10 + (cur - '0');
2458
1.54k
      else {
2459
1.54k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
1.54k
    val = 0;
2461
1.54k
    break;
2462
1.54k
      }
2463
226k
      if (val > 0x110000)
2464
1.18k
          val = 0x110000;
2465
2466
226k
      ptr++;
2467
226k
      cur = *ptr;
2468
226k
  }
2469
101k
  if (cur == ';')
2470
100k
      ptr++;
2471
101k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
129k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
129k
    if (val >= 0x110000) {
2483
226
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
226
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
226
                val);
2486
129k
    } else if (IS_CHAR(val)) {
2487
126k
        return(val);
2488
126k
    } else {
2489
3.38k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
3.38k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
3.38k
        val);
2492
3.38k
    }
2493
3.61k
    return(0);
2494
129k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
854k
#define growBuffer(buffer, n) {           \
2593
854k
    xmlChar *tmp;             \
2594
854k
    size_t new_size = buffer##_size * 2 + n;                            \
2595
854k
    if (new_size < buffer##_size) goto mem_error;                       \
2596
854k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
854k
    if (tmp == NULL) goto mem_error;         \
2598
854k
    buffer = tmp;             \
2599
854k
    buffer##_size = new_size;                                           \
2600
854k
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
14.1M
                           int check) {
2617
14.1M
    xmlChar *buffer = NULL;
2618
14.1M
    size_t buffer_size = 0;
2619
14.1M
    size_t nbchars = 0;
2620
2621
14.1M
    xmlChar *current = NULL;
2622
14.1M
    xmlChar *rep = NULL;
2623
14.1M
    const xmlChar *last;
2624
14.1M
    xmlEntityPtr ent;
2625
14.1M
    int c,l;
2626
2627
14.1M
    if (str == NULL)
2628
10.9k
        return(NULL);
2629
14.1M
    last = str + len;
2630
2631
14.1M
    if (((ctxt->depth > 40) &&
2632
14.1M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
14.1M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
14.1M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
14.1M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
14.1M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
14.1M
    if (str < last)
2651
14.0M
  c = CUR_SCHAR(str, l);
2652
87.0k
    else
2653
87.0k
        c = 0;
2654
859M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
859M
           (c != end2) && (c != end3) &&
2656
859M
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
845M
  if (c == 0) break;
2659
845M
        if ((c == '&') && (str[1] == '#')) {
2660
129k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
129k
      if (val == 0)
2662
3.61k
                goto int_error;
2663
126k
      COPY_BUF(0,buffer,nbchars,val);
2664
126k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
156
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
156
      }
2667
845M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
12.7M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
12.7M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
12.7M
      if ((ent != NULL) &&
2674
12.7M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
80.3k
    if (ent->content != NULL) {
2676
80.3k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
80.3k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
6.44k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
6.44k
        }
2680
80.3k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
12.7M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
12.5M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
99
                    goto int_error;
2688
2689
12.5M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
183
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
183
                    xmlHaltParser(ctxt);
2692
183
                    ent->content[0] = 0;
2693
183
                    goto int_error;
2694
183
                }
2695
2696
12.5M
                ent->flags |= XML_ENT_EXPANDING;
2697
12.5M
    ctxt->depth++;
2698
12.5M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
12.5M
                        ent->length, what, 0, 0, 0, check);
2700
12.5M
    ctxt->depth--;
2701
12.5M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
12.5M
    if (rep == NULL) {
2704
1.75k
                    ent->content[0] = 0;
2705
1.75k
                    goto int_error;
2706
1.75k
                }
2707
2708
12.5M
                current = rep;
2709
2.07G
                while (*current != 0) { /* non input consuming loop */
2710
2.06G
                    buffer[nbchars++] = *current++;
2711
2.06G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
1.32M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
1.32M
                    }
2714
2.06G
                }
2715
12.5M
                xmlFree(rep);
2716
12.5M
                rep = NULL;
2717
12.5M
      } else if (ent != NULL) {
2718
56.9k
    int i = xmlStrlen(ent->name);
2719
56.9k
    const xmlChar *cur = ent->name;
2720
2721
56.9k
    buffer[nbchars++] = '&';
2722
56.9k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
270
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
270
    }
2725
122k
    for (;i > 0;i--)
2726
65.4k
        buffer[nbchars++] = *cur++;
2727
56.9k
    buffer[nbchars++] = ';';
2728
56.9k
      }
2729
832M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
284k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
284k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
284k
      if (ent != NULL) {
2735
243k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
2.46k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
2.46k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
2.46k
      (ctxt->validate != 0)) {
2745
2.24k
      xmlLoadEntityContent(ctxt, ent);
2746
2.24k
        } else {
2747
219
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
219
      "not validating will not read content for PE entity %s\n",
2749
219
                          ent->name, NULL);
2750
219
        }
2751
2.46k
    }
2752
2753
243k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
48
                    goto int_error;
2755
2756
243k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
88
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
88
                    xmlHaltParser(ctxt);
2759
88
                    if (ent->content != NULL)
2760
45
                        ent->content[0] = 0;
2761
88
                    goto int_error;
2762
88
                }
2763
2764
243k
                ent->flags |= XML_ENT_EXPANDING;
2765
243k
    ctxt->depth++;
2766
243k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
243k
                        ent->length, what, 0, 0, 0, check);
2768
243k
    ctxt->depth--;
2769
243k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
243k
    if (rep == NULL) {
2772
1.33k
                    if (ent->content != NULL)
2773
66
                        ent->content[0] = 0;
2774
1.33k
                    goto int_error;
2775
1.33k
                }
2776
242k
                current = rep;
2777
433M
                while (*current != 0) { /* non input consuming loop */
2778
433M
                    buffer[nbchars++] = *current++;
2779
433M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
56.5k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
56.5k
                    }
2782
433M
                }
2783
242k
                xmlFree(rep);
2784
242k
                rep = NULL;
2785
242k
      }
2786
832M
  } else {
2787
832M
      COPY_BUF(l,buffer,nbchars,c);
2788
832M
      str += l;
2789
832M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
214k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
214k
      }
2792
832M
  }
2793
845M
  if (str < last)
2794
831M
      c = CUR_SCHAR(str, l);
2795
14.0M
  else
2796
14.0M
      c = 0;
2797
845M
    }
2798
14.1M
    buffer[nbchars] = 0;
2799
14.1M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
7.12k
int_error:
2804
7.12k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
7.12k
    if (buffer != NULL)
2807
7.12k
        xmlFree(buffer);
2808
7.12k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
7.75k
                           xmlChar end3) {
2836
7.75k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
7.75k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
7.75k
                                      end, end2, end3, 0));
2840
7.75k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
194k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
194k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
194k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
194k
                                      end, end2, end3, 0));
2868
194k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
3.31M
                     int blank_chars) {
2890
3.31M
    int i, ret;
2891
3.31M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
3.31M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
173k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
3.14M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
3.14M
        (*(ctxt->space) == -2))
2905
1.37M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
1.76M
    if (blank_chars == 0) {
2911
3.44M
  for (i = 0;i < len;i++)
2912
2.97M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
706k
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
1.53M
    if (ctxt->node == NULL) return(0);
2919
1.48M
    if (ctxt->myDoc != NULL) {
2920
1.48M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
1.48M
        if (ret == 0) return(1);
2922
1.27M
        if (ret == 1) return(0);
2923
1.27M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
1.26M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
1.24M
    if ((ctxt->node->children == NULL) &&
2930
1.24M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
1.24M
    lastChild = xmlGetLastChild(ctxt->node);
2933
1.24M
    if (lastChild == NULL) {
2934
266k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
266k
            (ctxt->node->content != NULL)) return(0);
2936
977k
    } else if (xmlNodeIsText(lastChild))
2937
22.8k
        return(0);
2938
954k
    else if ((ctxt->node->children != NULL) &&
2939
954k
             (xmlNodeIsText(ctxt->node->children)))
2940
15.3k
        return(0);
2941
1.20M
    return(1);
2942
1.24M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
6.72M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
6.72M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
6.72M
    xmlChar *buffer = NULL;
2973
6.72M
    int len = 0;
2974
6.72M
    int max = XML_MAX_NAMELEN;
2975
6.72M
    xmlChar *ret = NULL;
2976
6.72M
    const xmlChar *cur = name;
2977
6.72M
    int c;
2978
2979
6.72M
    if (prefix == NULL) return(NULL);
2980
6.72M
    *prefix = NULL;
2981
2982
6.72M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
6.72M
    if (cur[0] == ':')
2993
6.43k
  return(xmlStrdup(name));
2994
2995
6.72M
    c = *cur++;
2996
31.8M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
25.1M
  buf[len++] = c;
2998
25.1M
  c = *cur++;
2999
25.1M
    }
3000
6.72M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
2.22k
  max = len * 2;
3006
3007
2.22k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
2.22k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
2.22k
  memcpy(buffer, buf, len);
3013
2.94M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
2.94M
      if (len + 10 > max) {
3015
4.14k
          xmlChar *tmp;
3016
3017
4.14k
    max *= 2;
3018
4.14k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
4.14k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
4.14k
    buffer = tmp;
3025
4.14k
      }
3026
2.94M
      buffer[len++] = c;
3027
2.94M
      c = *cur++;
3028
2.94M
  }
3029
2.22k
  buffer[len] = 0;
3030
2.22k
    }
3031
3032
6.72M
    if ((c == ':') && (*cur == 0)) {
3033
5.95k
        if (buffer != NULL)
3034
228
      xmlFree(buffer);
3035
5.95k
  *prefix = NULL;
3036
5.95k
  return(xmlStrdup(name));
3037
5.95k
    }
3038
3039
6.71M
    if (buffer == NULL)
3040
6.71M
  ret = xmlStrndup(buf, len);
3041
1.99k
    else {
3042
1.99k
  ret = buffer;
3043
1.99k
  buffer = NULL;
3044
1.99k
  max = XML_MAX_NAMELEN;
3045
1.99k
    }
3046
3047
3048
6.71M
    if (c == ':') {
3049
456k
  c = *cur;
3050
456k
        *prefix = ret;
3051
456k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
456k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
456k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
456k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
456k
        (c == '_') || (c == ':'))) {
3063
4.96k
      int l;
3064
4.96k
      int first = CUR_SCHAR(cur, l);
3065
3066
4.96k
      if (!IS_LETTER(first) && (first != '_')) {
3067
1.60k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
1.60k
          "Name %s is not XML Namespace compliant\n",
3069
1.60k
          name);
3070
1.60k
      }
3071
4.96k
  }
3072
456k
  cur++;
3073
3074
2.85M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
2.39M
      buf[len++] = c;
3076
2.39M
      c = *cur++;
3077
2.39M
  }
3078
456k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
1.86k
      max = len * 2;
3084
3085
1.86k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
1.86k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
1.86k
      memcpy(buffer, buf, len);
3091
1.84M
      while (c != 0) { /* tested bigname2.xml */
3092
1.84M
    if (len + 10 > max) {
3093
2.75k
        xmlChar *tmp;
3094
3095
2.75k
        max *= 2;
3096
2.75k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
2.75k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
2.75k
        buffer = tmp;
3103
2.75k
    }
3104
1.84M
    buffer[len++] = c;
3105
1.84M
    c = *cur++;
3106
1.84M
      }
3107
1.86k
      buffer[len] = 0;
3108
1.86k
  }
3109
3110
456k
  if (buffer == NULL)
3111
454k
      ret = xmlStrndup(buf, len);
3112
1.86k
  else {
3113
1.86k
      ret = buffer;
3114
1.86k
  }
3115
456k
    }
3116
3117
6.71M
    return(ret);
3118
6.71M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
13.7M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
13.7M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
12.6M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
12.6M
      (((c >= 'a') && (c <= 'z')) ||
3160
12.6M
       ((c >= 'A') && (c <= 'Z')) ||
3161
12.6M
       (c == '_') || (c == ':') ||
3162
12.6M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
12.6M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
12.6M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
12.6M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
12.6M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
12.6M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
12.6M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
12.6M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
12.6M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
12.6M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
12.6M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
12.6M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
12.4M
      return(1);
3175
12.6M
    } else {
3176
1.09M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
1.02M
      return(1);
3178
1.09M
    }
3179
222k
    return(0);
3180
13.7M
}
3181
3182
static int
3183
280M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
280M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
273M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
273M
      (((c >= 'a') && (c <= 'z')) ||
3191
273M
       ((c >= 'A') && (c <= 'Z')) ||
3192
273M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
273M
       (c == '_') || (c == ':') ||
3194
273M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
273M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
273M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
273M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
273M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
273M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
273M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
273M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
273M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
273M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
273M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
273M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
273M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
273M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
273M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
260M
       return(1);
3210
273M
    } else {
3211
7.23M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
7.23M
            (c == '.') || (c == '-') ||
3213
7.23M
      (c == '_') || (c == ':') ||
3214
7.23M
      (IS_COMBINING(c)) ||
3215
7.23M
      (IS_EXTENDER(c)))
3216
5.95M
      return(1);
3217
7.23M
    }
3218
14.3M
    return(0);
3219
280M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
496k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
496k
    int len = 0, l;
3227
496k
    int c;
3228
496k
    int count = 0;
3229
496k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
216k
                    XML_MAX_TEXT_LENGTH :
3231
496k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
496k
    GROW;
3241
496k
    if (ctxt->instate == XML_PARSER_EOF)
3242
30
        return(NULL);
3243
496k
    c = CUR_CHAR(l);
3244
496k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
284k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
284k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
269k
         ((c >= 'A') && (c <= 'Z')) ||
3252
269k
         (c == '_') || (c == ':') ||
3253
269k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
269k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
269k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
269k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
269k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
269k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
269k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
269k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
269k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
269k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
269k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
269k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
189k
      return(NULL);
3266
189k
  }
3267
95.7k
  len += l;
3268
95.7k
  NEXTL(l);
3269
95.7k
  c = CUR_CHAR(l);
3270
2.34M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
2.34M
         (((c >= 'a') && (c <= 'z')) ||
3272
2.32M
          ((c >= 'A') && (c <= 'Z')) ||
3273
2.32M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
2.32M
          (c == '_') || (c == ':') ||
3275
2.32M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
2.32M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
2.32M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
2.32M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
2.32M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
2.32M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
2.32M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
2.32M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
2.32M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
2.32M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
2.32M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
2.32M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
2.32M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
2.32M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
2.32M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
2.32M
    )) {
3291
2.24M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
17.2k
    count = 0;
3293
17.2k
    GROW;
3294
17.2k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
17.2k
      }
3297
2.24M
            if (len <= INT_MAX - l)
3298
2.24M
          len += l;
3299
2.24M
      NEXTL(l);
3300
2.24M
      c = CUR_CHAR(l);
3301
2.24M
  }
3302
211k
    } else {
3303
211k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
211k
      (!IS_LETTER(c) && (c != '_') &&
3305
199k
       (c != ':'))) {
3306
141k
      return(NULL);
3307
141k
  }
3308
69.7k
  len += l;
3309
69.7k
  NEXTL(l);
3310
69.7k
  c = CUR_CHAR(l);
3311
3312
2.10M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
2.10M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
2.08M
    (c == '.') || (c == '-') ||
3315
2.08M
    (c == '_') || (c == ':') ||
3316
2.08M
    (IS_COMBINING(c)) ||
3317
2.08M
    (IS_EXTENDER(c)))) {
3318
2.03M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
16.2k
    count = 0;
3320
16.2k
    GROW;
3321
16.2k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
16.2k
      }
3324
2.03M
            if (len <= INT_MAX - l)
3325
2.03M
          len += l;
3326
2.03M
      NEXTL(l);
3327
2.03M
      c = CUR_CHAR(l);
3328
2.03M
  }
3329
69.7k
    }
3330
165k
    if (len > maxLength) {
3331
6
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
6
        return(NULL);
3333
6
    }
3334
165k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
165k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
855
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
164k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
165k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
32.9M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
32.9M
    const xmlChar *in;
3370
32.9M
    const xmlChar *ret;
3371
32.9M
    size_t count = 0;
3372
32.9M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
9.61M
                       XML_MAX_TEXT_LENGTH :
3374
32.9M
                       XML_MAX_NAME_LENGTH;
3375
3376
32.9M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
32.9M
    in = ctxt->input->cur;
3386
32.9M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
32.9M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
32.9M
  (*in == '_') || (*in == ':')) {
3389
32.6M
  in++;
3390
149M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
149M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
149M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
149M
         (*in == '_') || (*in == '-') ||
3394
149M
         (*in == ':') || (*in == '.'))
3395
116M
      in++;
3396
32.6M
  if ((*in > 0) && (*in < 0x80)) {
3397
32.4M
      count = in - ctxt->input->cur;
3398
32.4M
            if (count > maxLength) {
3399
6
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
6
                return(NULL);
3401
6
            }
3402
32.4M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
32.4M
      ctxt->input->cur = in;
3404
32.4M
      ctxt->input->col += count;
3405
32.4M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
32.4M
      return(ret);
3408
32.4M
  }
3409
32.6M
    }
3410
    /* accelerator for special cases */
3411
496k
    return(xmlParseNameComplex(ctxt));
3412
32.9M
}
3413
3414
static const xmlChar *
3415
288k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
288k
    int len = 0, l;
3417
288k
    int c;
3418
288k
    int count = 0;
3419
288k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
113k
                    XML_MAX_TEXT_LENGTH :
3421
288k
                    XML_MAX_NAME_LENGTH;
3422
288k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
288k
    GROW;
3432
288k
    startPosition = CUR_PTR - BASE_PTR;
3433
288k
    c = CUR_CHAR(l);
3434
288k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
288k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
232k
  return(NULL);
3437
232k
    }
3438
3439
1.97M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
1.97M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
1.91M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
15.2k
      count = 0;
3443
15.2k
      GROW;
3444
15.2k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
15.2k
  }
3447
1.91M
        if (len <= INT_MAX - l)
3448
1.91M
      len += l;
3449
1.91M
  NEXTL(l);
3450
1.91M
  c = CUR_CHAR(l);
3451
1.91M
  if (c == 0) {
3452
5.53k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
5.53k
      ctxt->input->cur -= l;
3459
5.53k
      GROW;
3460
5.53k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
5.53k
      ctxt->input->cur += l;
3463
5.53k
      c = CUR_CHAR(l);
3464
5.53k
  }
3465
1.91M
    }
3466
56.1k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
56.1k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
56.1k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
10.1M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
10.1M
    const xmlChar *in, *e;
3491
10.1M
    const xmlChar *ret;
3492
10.1M
    size_t count = 0;
3493
10.1M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
3.15M
                       XML_MAX_TEXT_LENGTH :
3495
10.1M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
10.1M
    in = ctxt->input->cur;
3505
10.1M
    e = ctxt->input->end;
3506
10.1M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
10.1M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
10.1M
   (*in == '_')) && (in < e)) {
3509
9.88M
  in++;
3510
35.0M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
35.0M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
35.0M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
35.0M
          (*in == '_') || (*in == '-') ||
3514
35.0M
          (*in == '.')) && (in < e))
3515
25.1M
      in++;
3516
9.88M
  if (in >= e)
3517
1.26k
      goto complex;
3518
9.88M
  if ((*in > 0) && (*in < 0x80)) {
3519
9.84M
      count = in - ctxt->input->cur;
3520
9.84M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
9.84M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
9.84M
      ctxt->input->cur = in;
3526
9.84M
      ctxt->input->col += count;
3527
9.84M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
9.84M
      return(ret);
3531
9.84M
  }
3532
9.88M
    }
3533
288k
complex:
3534
288k
    return(xmlParseNCNameComplex(ctxt));
3535
10.1M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
3.43M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
3.43M
    register const xmlChar *cmp = other;
3551
3.43M
    register const xmlChar *in;
3552
3.43M
    const xmlChar *ret;
3553
3554
3.43M
    GROW;
3555
3.43M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
3.43M
    in = ctxt->input->cur;
3559
17.7M
    while (*in != 0 && *in == *cmp) {
3560
14.3M
  ++in;
3561
14.3M
  ++cmp;
3562
14.3M
    }
3563
3.43M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
3.28M
  ctxt->input->col += in - ctxt->input->cur;
3566
3.28M
  ctxt->input->cur = in;
3567
3.28M
  return (const xmlChar*) 1;
3568
3.28M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
156k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
156k
    if (ret == other) {
3573
6.80k
  return (const xmlChar*) 1;
3574
6.80k
    }
3575
149k
    return ret;
3576
156k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
13.4M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
13.4M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
13.4M
    const xmlChar *cur = *str;
3600
13.4M
    int len = 0, l;
3601
13.4M
    int c;
3602
13.4M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
778k
                    XML_MAX_TEXT_LENGTH :
3604
13.4M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
13.4M
    c = CUR_SCHAR(cur, l);
3611
13.4M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
12.1k
  return(NULL);
3613
12.1k
    }
3614
3615
13.4M
    COPY_BUF(l,buf,len,c);
3616
13.4M
    cur += l;
3617
13.4M
    c = CUR_SCHAR(cur, l);
3618
125M
    while (xmlIsNameChar(ctxt, c)) {
3619
112M
  COPY_BUF(l,buf,len,c);
3620
112M
  cur += l;
3621
112M
  c = CUR_SCHAR(cur, l);
3622
112M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
639k
      xmlChar *buffer;
3628
639k
      int max = len * 2;
3629
3630
639k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
639k
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
639k
      memcpy(buffer, buf, len);
3636
146M
      while (xmlIsNameChar(ctxt, c)) {
3637
145M
    if (len + 10 > max) {
3638
641k
        xmlChar *tmp;
3639
3640
641k
        max *= 2;
3641
641k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
641k
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
641k
        buffer = tmp;
3648
641k
    }
3649
145M
    COPY_BUF(l,buffer,len,c);
3650
145M
    cur += l;
3651
145M
    c = CUR_SCHAR(cur, l);
3652
145M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
145M
      }
3658
639k
      buffer[len] = 0;
3659
639k
      *str = cur;
3660
639k
      return(buffer);
3661
639k
  }
3662
112M
    }
3663
12.8M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
12.8M
    *str = cur;
3668
12.8M
    return(xmlStrndup(buf, len));
3669
12.8M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
852k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
852k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
852k
    int len = 0, l;
3690
852k
    int c;
3691
852k
    int count = 0;
3692
852k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
419k
                    XML_MAX_TEXT_LENGTH :
3694
852k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
852k
    GROW;
3701
852k
    if (ctxt->instate == XML_PARSER_EOF)
3702
3
        return(NULL);
3703
852k
    c = CUR_CHAR(l);
3704
3705
5.08M
    while (xmlIsNameChar(ctxt, c)) {
3706
4.23M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
4.23M
  COPY_BUF(l,buf,len,c);
3711
4.23M
  NEXTL(l);
3712
4.23M
  c = CUR_CHAR(l);
3713
4.23M
  if (c == 0) {
3714
865
      count = 0;
3715
865
      GROW;
3716
865
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
865
            c = CUR_CHAR(l);
3719
865
  }
3720
4.23M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
738
      xmlChar *buffer;
3726
738
      int max = len * 2;
3727
3728
738
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
738
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
738
      memcpy(buffer, buf, len);
3734
1.28M
      while (xmlIsNameChar(ctxt, c)) {
3735
1.28M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
12.9k
        count = 0;
3737
12.9k
        GROW;
3738
12.9k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
12.9k
    }
3743
1.28M
    if (len + 10 > max) {
3744
1.34k
        xmlChar *tmp;
3745
3746
1.34k
        max *= 2;
3747
1.34k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
1.34k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
1.34k
        buffer = tmp;
3754
1.34k
    }
3755
1.28M
    COPY_BUF(l,buffer,len,c);
3756
1.28M
    NEXTL(l);
3757
1.28M
    c = CUR_CHAR(l);
3758
1.28M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
1.28M
      }
3764
738
      buffer[len] = 0;
3765
738
      return(buffer);
3766
738
  }
3767
4.23M
    }
3768
852k
    if (len == 0)
3769
5.57k
        return(NULL);
3770
846k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
846k
    return(xmlStrndup(buf, len));
3775
846k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
542k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
542k
    xmlChar *buf = NULL;
3795
542k
    int len = 0;
3796
542k
    int size = XML_PARSER_BUFFER_SIZE;
3797
542k
    int c, l;
3798
542k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
200k
                    XML_MAX_HUGE_LENGTH :
3800
542k
                    XML_MAX_TEXT_LENGTH;
3801
542k
    xmlChar stop;
3802
542k
    xmlChar *ret = NULL;
3803
542k
    const xmlChar *cur = NULL;
3804
542k
    xmlParserInputPtr input;
3805
3806
542k
    if (RAW == '"') stop = '"';
3807
137k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
542k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
542k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
542k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
542k
    input = ctxt->input;
3824
542k
    GROW;
3825
542k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
542k
    NEXT;
3828
542k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
31.7M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
31.7M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
31.2M
  if (len + 5 >= size) {
3841
95.6k
      xmlChar *tmp;
3842
3843
95.6k
      size *= 2;
3844
95.6k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
95.6k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
95.6k
      buf = tmp;
3850
95.6k
  }
3851
31.2M
  COPY_BUF(l,buf,len,c);
3852
31.2M
  NEXTL(l);
3853
3854
31.2M
  GROW;
3855
31.2M
  c = CUR_CHAR(l);
3856
31.2M
  if (c == 0) {
3857
592
      GROW;
3858
592
      c = CUR_CHAR(l);
3859
592
  }
3860
3861
31.2M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
31.2M
    }
3867
542k
    buf[len] = 0;
3868
542k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
542k
    if (c != stop) {
3871
816
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
816
        goto error;
3873
816
    }
3874
541k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
541k
    cur = buf;
3882
25.5M
    while (*cur != 0) { /* non input consuming */
3883
25.0M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
371k
      xmlChar *name;
3885
371k
      xmlChar tmp = *cur;
3886
371k
            int nameOk = 0;
3887
3888
371k
      cur++;
3889
371k
      name = xmlParseStringName(ctxt, &cur);
3890
371k
            if (name != NULL) {
3891
370k
                nameOk = 1;
3892
370k
                xmlFree(name);
3893
370k
            }
3894
371k
            if ((nameOk == 0) || (*cur != ';')) {
3895
3.90k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
3.90k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
3.90k
                            tmp);
3898
3.90k
                goto error;
3899
3.90k
      }
3900
367k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
367k
    (ctxt->inputNr == 1)) {
3902
3.13k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
3.13k
                goto error;
3904
3.13k
      }
3905
364k
      if (*cur == 0)
3906
0
          break;
3907
364k
  }
3908
25.0M
  cur++;
3909
25.0M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
534k
    ++ctxt->depth;
3920
534k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
534k
                                     0, 0, 0, /* check */ 1);
3922
534k
    --ctxt->depth;
3923
3924
534k
    if (orig != NULL) {
3925
534k
        *orig = buf;
3926
534k
        buf = NULL;
3927
534k
    }
3928
3929
542k
error:
3930
542k
    if (buf != NULL)
3931
7.84k
        xmlFree(buf);
3932
542k
    return(ret);
3933
534k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
359k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
359k
    xmlChar limit = 0;
3950
359k
    xmlChar *buf = NULL;
3951
359k
    xmlChar *rep = NULL;
3952
359k
    size_t len = 0;
3953
359k
    size_t buf_size = 0;
3954
359k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
156k
                       XML_MAX_HUGE_LENGTH :
3956
359k
                       XML_MAX_TEXT_LENGTH;
3957
359k
    int c, l, in_space = 0;
3958
359k
    xmlChar *current = NULL;
3959
359k
    xmlEntityPtr ent;
3960
3961
359k
    if (NXT(0) == '"') {
3962
244k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
244k
  limit = '"';
3964
244k
        NEXT;
3965
244k
    } else if (NXT(0) == '\'') {
3966
115k
  limit = '\'';
3967
115k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
115k
        NEXT;
3969
115k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
359k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
359k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
359k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
359k
    c = CUR_CHAR(l);
3985
17.7M
    while (((NXT(0) != limit) && /* checked */
3986
17.7M
            (IS_CHAR(c)) && (c != '<')) &&
3987
17.7M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
17.4M
  if (c == '&') {
3989
1.52M
      in_space = 0;
3990
1.52M
      if (NXT(1) == '#') {
3991
199k
    int val = xmlParseCharRef(ctxt);
3992
3993
199k
    if (val == '&') {
3994
1.65k
        if (ctxt->replaceEntities) {
3995
634
      if (len + 10 > buf_size) {
3996
56
          growBuffer(buf, 10);
3997
56
      }
3998
634
      buf[len++] = '&';
3999
1.02k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
1.02k
      if (len + 10 > buf_size) {
4005
142
          growBuffer(buf, 10);
4006
142
      }
4007
1.02k
      buf[len++] = '&';
4008
1.02k
      buf[len++] = '#';
4009
1.02k
      buf[len++] = '3';
4010
1.02k
      buf[len++] = '8';
4011
1.02k
      buf[len++] = ';';
4012
1.02k
        }
4013
197k
    } else if (val != 0) {
4014
175k
        if (len + 10 > buf_size) {
4015
1.10k
      growBuffer(buf, 10);
4016
1.10k
        }
4017
175k
        len += xmlCopyChar(0, &buf[len], val);
4018
175k
    }
4019
1.32M
      } else {
4020
1.32M
    ent = xmlParseEntityRef(ctxt);
4021
1.32M
    if ((ent != NULL) &&
4022
1.32M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
33.8k
        if (len + 10 > buf_size) {
4024
70
      growBuffer(buf, 10);
4025
70
        }
4026
33.8k
        if ((ctxt->replaceEntities == 0) &&
4027
33.8k
            (ent->content[0] == '&')) {
4028
14.9k
      buf[len++] = '&';
4029
14.9k
      buf[len++] = '#';
4030
14.9k
      buf[len++] = '3';
4031
14.9k
      buf[len++] = '8';
4032
14.9k
      buf[len++] = ';';
4033
18.8k
        } else {
4034
18.8k
      buf[len++] = ent->content[0];
4035
18.8k
        }
4036
1.28M
    } else if ((ent != NULL) &&
4037
1.28M
               (ctxt->replaceEntities != 0)) {
4038
598k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
598k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
598k
      ++ctxt->depth;
4043
598k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
598k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
598k
                                /* check */ 1);
4046
598k
      --ctxt->depth;
4047
598k
      if (rep != NULL) {
4048
587k
          current = rep;
4049
115M
          while (*current != 0) { /* non input consuming */
4050
114M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
114M
                                    (*current == 0x9)) {
4052
394k
                                    buf[len++] = 0x20;
4053
394k
                                    current++;
4054
394k
                                } else
4055
114M
                                    buf[len++] = *current++;
4056
114M
        if (len + 10 > buf_size) {
4057
18.8k
            growBuffer(buf, 10);
4058
18.8k
        }
4059
114M
          }
4060
587k
          xmlFree(rep);
4061
587k
          rep = NULL;
4062
587k
      }
4063
598k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
691k
    } else if (ent != NULL) {
4071
516k
        int i = xmlStrlen(ent->name);
4072
516k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
516k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
516k
      (ent->content != NULL)) {
4081
479k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
8.71k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
8.71k
                            ctxt->sizeentcopy = ent->length;
4085
4086
8.71k
                            ++ctxt->depth;
4087
8.71k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
8.71k
                                    ent->content, ent->length,
4089
8.71k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
8.71k
                                    /* check */ 1);
4091
8.71k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
8.71k
                            if (ctxt->inSubset == 0) {
4100
7.50k
                                ent->flags |= XML_ENT_CHECKED;
4101
7.50k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
7.50k
                            }
4103
4104
8.71k
                            if (rep != NULL) {
4105
8.64k
                                xmlFree(rep);
4106
8.64k
                                rep = NULL;
4107
8.64k
                            } else {
4108
74
                                ent->content[0] = 0;
4109
74
                            }
4110
4111
8.71k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
3
                                goto error;
4113
470k
                        } else {
4114
470k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
2
                                goto error;
4116
470k
                        }
4117
479k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
516k
        buf[len++] = '&';
4123
518k
        while (len + i + 10 > buf_size) {
4124
4.96k
      growBuffer(buf, i + 10);
4125
4.96k
        }
4126
1.24M
        for (;i > 0;i--)
4127
730k
      buf[len++] = *cur++;
4128
516k
        buf[len++] = ';';
4129
516k
    }
4130
1.32M
      }
4131
15.8M
  } else {
4132
15.8M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
2.86M
          if ((len != 0) || (!normalize)) {
4134
2.73M
        if ((!normalize) || (!in_space)) {
4135
2.48M
      COPY_BUF(l,buf,len,0x20);
4136
2.49M
      while (len + 10 > buf_size) {
4137
17.3k
          growBuffer(buf, 10);
4138
17.3k
      }
4139
2.48M
        }
4140
2.73M
        in_space = 1;
4141
2.73M
    }
4142
13.0M
      } else {
4143
13.0M
          in_space = 0;
4144
13.0M
    COPY_BUF(l,buf,len,c);
4145
13.0M
    if (len + 10 > buf_size) {
4146
61.5k
        growBuffer(buf, 10);
4147
61.5k
    }
4148
13.0M
      }
4149
15.8M
      NEXTL(l);
4150
15.8M
  }
4151
17.4M
  GROW;
4152
17.4M
  c = CUR_CHAR(l);
4153
17.4M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
17.4M
    }
4159
359k
    if (ctxt->instate == XML_PARSER_EOF)
4160
279
        goto error;
4161
4162
359k
    if ((in_space) && (normalize)) {
4163
19.8k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
8.93k
    }
4165
359k
    buf[len] = 0;
4166
359k
    if (RAW == '<') {
4167
84.2k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
275k
    } else if (RAW != limit) {
4169
62.2k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
23.3k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
23.3k
         "invalid character in attribute value\n");
4172
38.9k
  } else {
4173
38.9k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
38.9k
         "AttValue: ' expected\n");
4175
38.9k
        }
4176
62.2k
    } else
4177
213k
  NEXT;
4178
4179
359k
    if (attlen != NULL) *attlen = len;
4180
359k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
284
error:
4185
284
    if (buf != NULL)
4186
284
        xmlFree(buf);
4187
284
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
284
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
2.61M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
2.61M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
2.61M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
2.61M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
179k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
179k
    xmlChar *buf = NULL;
4250
179k
    int len = 0;
4251
179k
    int size = XML_PARSER_BUFFER_SIZE;
4252
179k
    int cur, l;
4253
179k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
74.1k
                    XML_MAX_TEXT_LENGTH :
4255
179k
                    XML_MAX_NAME_LENGTH;
4256
179k
    xmlChar stop;
4257
179k
    int state = ctxt->instate;
4258
179k
    int count = 0;
4259
4260
179k
    SHRINK;
4261
179k
    if (RAW == '"') {
4262
158k
        NEXT;
4263
158k
  stop = '"';
4264
158k
    } else if (RAW == '\'') {
4265
14.5k
        NEXT;
4266
14.5k
  stop = '\'';
4267
14.5k
    } else {
4268
5.95k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
5.95k
  return(NULL);
4270
5.95k
    }
4271
4272
173k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
173k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
173k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
173k
    cur = CUR_CHAR(l);
4279
11.1M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
11.0M
  if (len + 5 >= size) {
4281
7.20k
      xmlChar *tmp;
4282
4283
7.20k
      size *= 2;
4284
7.20k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
7.20k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
7.20k
      buf = tmp;
4292
7.20k
  }
4293
11.0M
  count++;
4294
11.0M
  if (count > 50) {
4295
168k
      SHRINK;
4296
168k
      GROW;
4297
168k
      count = 0;
4298
168k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
168k
  }
4303
11.0M
  COPY_BUF(l,buf,len,cur);
4304
11.0M
  NEXTL(l);
4305
11.0M
  cur = CUR_CHAR(l);
4306
11.0M
  if (cur == 0) {
4307
1.08k
      GROW;
4308
1.08k
      SHRINK;
4309
1.08k
      cur = CUR_CHAR(l);
4310
1.08k
  }
4311
11.0M
        if (len > maxLength) {
4312
63
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
63
            xmlFree(buf);
4314
63
            ctxt->instate = (xmlParserInputState) state;
4315
63
            return(NULL);
4316
63
        }
4317
11.0M
    }
4318
173k
    buf[len] = 0;
4319
173k
    ctxt->instate = (xmlParserInputState) state;
4320
173k
    if (!IS_CHAR(cur)) {
4321
1.82k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
171k
    } else {
4323
171k
  NEXT;
4324
171k
    }
4325
173k
    return(buf);
4326
173k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
47.4k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
47.4k
    xmlChar *buf = NULL;
4344
47.4k
    int len = 0;
4345
47.4k
    int size = XML_PARSER_BUFFER_SIZE;
4346
47.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
19.1k
                    XML_MAX_TEXT_LENGTH :
4348
47.4k
                    XML_MAX_NAME_LENGTH;
4349
47.4k
    xmlChar cur;
4350
47.4k
    xmlChar stop;
4351
47.4k
    int count = 0;
4352
47.4k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
47.4k
    SHRINK;
4355
47.4k
    if (RAW == '"') {
4356
26.9k
        NEXT;
4357
26.9k
  stop = '"';
4358
26.9k
    } else if (RAW == '\'') {
4359
18.7k
        NEXT;
4360
18.7k
  stop = '\'';
4361
18.7k
    } else {
4362
1.77k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
1.77k
  return(NULL);
4364
1.77k
    }
4365
45.7k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
45.7k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
45.7k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
45.7k
    cur = CUR;
4372
2.43M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
2.39M
  if (len + 1 >= size) {
4374
3.11k
      xmlChar *tmp;
4375
4376
3.11k
      size *= 2;
4377
3.11k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
3.11k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
3.11k
      buf = tmp;
4384
3.11k
  }
4385
2.39M
  buf[len++] = cur;
4386
2.39M
  count++;
4387
2.39M
  if (count > 50) {
4388
31.2k
      SHRINK;
4389
31.2k
      GROW;
4390
31.2k
      count = 0;
4391
31.2k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
31.2k
  }
4396
2.39M
  NEXT;
4397
2.39M
  cur = CUR;
4398
2.39M
  if (cur == 0) {
4399
676
      GROW;
4400
676
      SHRINK;
4401
676
      cur = CUR;
4402
676
  }
4403
2.39M
        if (len > maxLength) {
4404
13
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
13
            xmlFree(buf);
4406
13
            return(NULL);
4407
13
        }
4408
2.39M
    }
4409
45.7k
    buf[len] = 0;
4410
45.7k
    if (cur != stop) {
4411
6.43k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
39.2k
    } else {
4413
39.2k
  NEXT;
4414
39.2k
    }
4415
45.7k
    ctxt->instate = oldstate;
4416
45.7k
    return(buf);
4417
45.7k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
11.4M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
11.4M
    const xmlChar *in;
4482
11.4M
    int nbchar = 0;
4483
11.4M
    int line = ctxt->input->line;
4484
11.4M
    int col = ctxt->input->col;
4485
11.4M
    int ccol;
4486
4487
11.4M
    SHRINK;
4488
11.4M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
11.4M
    in = ctxt->input->cur;
4494
14.5M
    do {
4495
18.3M
get_more_space:
4496
25.0M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
18.3M
        if (*in == 0xA) {
4498
3.94M
            do {
4499
3.94M
                ctxt->input->line++; ctxt->input->col = 1;
4500
3.94M
                in++;
4501
3.94M
            } while (*in == 0xA);
4502
3.79M
            goto get_more_space;
4503
3.79M
        }
4504
14.5M
        if (*in == '<') {
4505
3.10M
            nbchar = in - ctxt->input->cur;
4506
3.10M
            if (nbchar > 0) {
4507
3.10M
                const xmlChar *tmp = ctxt->input->cur;
4508
3.10M
                ctxt->input->cur = in;
4509
4510
3.10M
                if ((ctxt->sax != NULL) &&
4511
3.10M
                    (ctxt->sax->ignorableWhitespace !=
4512
3.10M
                     ctxt->sax->characters)) {
4513
1.32M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
978k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
978k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
978k
                                                   tmp, nbchar);
4517
978k
                    } else {
4518
343k
                        if (ctxt->sax->characters != NULL)
4519
343k
                            ctxt->sax->characters(ctxt->userData,
4520
343k
                                                  tmp, nbchar);
4521
343k
                        if (*ctxt->space == -1)
4522
82.9k
                            *ctxt->space = -2;
4523
343k
                    }
4524
1.78M
                } else if ((ctxt->sax != NULL) &&
4525
1.78M
                           (ctxt->sax->characters != NULL)) {
4526
1.78M
                    ctxt->sax->characters(ctxt->userData,
4527
1.78M
                                          tmp, nbchar);
4528
1.78M
                }
4529
3.10M
            }
4530
3.10M
            return;
4531
3.10M
        }
4532
4533
14.9M
get_more:
4534
14.9M
        ccol = ctxt->input->col;
4535
210M
        while (test_char_data[*in]) {
4536
196M
            in++;
4537
196M
            ccol++;
4538
196M
        }
4539
14.9M
        ctxt->input->col = ccol;
4540
14.9M
        if (*in == 0xA) {
4541
3.28M
            do {
4542
3.28M
                ctxt->input->line++; ctxt->input->col = 1;
4543
3.28M
                in++;
4544
3.28M
            } while (*in == 0xA);
4545
3.20M
            goto get_more;
4546
3.20M
        }
4547
11.7M
        if (*in == ']') {
4548
283k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
5.01k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
5.01k
                ctxt->input->cur = in + 1;
4551
5.01k
                return;
4552
5.01k
            }
4553
277k
            in++;
4554
277k
            ctxt->input->col++;
4555
277k
            goto get_more;
4556
283k
        }
4557
11.4M
        nbchar = in - ctxt->input->cur;
4558
11.4M
        if (nbchar > 0) {
4559
8.41M
            if ((ctxt->sax != NULL) &&
4560
8.41M
                (ctxt->sax->ignorableWhitespace !=
4561
8.41M
                 ctxt->sax->characters) &&
4562
8.41M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
1.61M
                const xmlChar *tmp = ctxt->input->cur;
4564
1.61M
                ctxt->input->cur = in;
4565
4566
1.61M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
433k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
433k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
433k
                                                       tmp, nbchar);
4570
1.18M
                } else {
4571
1.18M
                    if (ctxt->sax->characters != NULL)
4572
1.18M
                        ctxt->sax->characters(ctxt->userData,
4573
1.18M
                                              tmp, nbchar);
4574
1.18M
                    if (*ctxt->space == -1)
4575
228k
                        *ctxt->space = -2;
4576
1.18M
                }
4577
1.61M
                line = ctxt->input->line;
4578
1.61M
                col = ctxt->input->col;
4579
6.80M
            } else if (ctxt->sax != NULL) {
4580
6.80M
                if (ctxt->sax->characters != NULL)
4581
6.80M
                    ctxt->sax->characters(ctxt->userData,
4582
6.80M
                                          ctxt->input->cur, nbchar);
4583
6.80M
                line = ctxt->input->line;
4584
6.80M
                col = ctxt->input->col;
4585
6.80M
            }
4586
8.41M
        }
4587
11.4M
        ctxt->input->cur = in;
4588
11.4M
        if (*in == 0xD) {
4589
3.15M
            in++;
4590
3.15M
            if (*in == 0xA) {
4591
3.12M
                ctxt->input->cur = in;
4592
3.12M
                in++;
4593
3.12M
                ctxt->input->line++; ctxt->input->col = 1;
4594
3.12M
                continue; /* while */
4595
3.12M
            }
4596
28.1k
            in--;
4597
28.1k
        }
4598
8.33M
        if (*in == '<') {
4599
6.79M
            return;
4600
6.79M
        }
4601
1.54M
        if (*in == '&') {
4602
656k
            return;
4603
656k
        }
4604
884k
        SHRINK;
4605
884k
        GROW;
4606
884k
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
884k
        in = ctxt->input->cur;
4609
4.00M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
4.00M
             (*in == 0x09) || (*in == 0x0a));
4611
902k
    ctxt->input->line = line;
4612
902k
    ctxt->input->col = col;
4613
902k
    xmlParseCharDataComplex(ctxt);
4614
902k
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
902k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
902k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
902k
    int nbchar = 0;
4631
902k
    int cur, l;
4632
902k
    int count = 0;
4633
4634
902k
    SHRINK;
4635
902k
    GROW;
4636
902k
    cur = CUR_CHAR(l);
4637
21.6M
    while ((cur != '<') && /* checked */
4638
21.6M
           (cur != '&') &&
4639
21.6M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
20.7M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
2.81k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
2.81k
  }
4643
20.7M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
20.7M
  NEXTL(l);
4646
20.7M
  cur = CUR_CHAR(l);
4647
20.7M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
65.6k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
65.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
54.1k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
219
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
219
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
219
                                     buf, nbchar);
4658
53.8k
    } else {
4659
53.8k
        if (ctxt->sax->characters != NULL)
4660
53.8k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
53.8k
        if ((ctxt->sax->characters !=
4662
53.8k
             ctxt->sax->ignorableWhitespace) &&
4663
53.8k
      (*ctxt->space == -1))
4664
1.68k
      *ctxt->space = -2;
4665
53.8k
    }
4666
54.1k
      }
4667
65.6k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
65.6k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
65.6k
  }
4672
20.7M
  count++;
4673
20.7M
  if (count > 50) {
4674
307k
      SHRINK;
4675
307k
      GROW;
4676
307k
      count = 0;
4677
307k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
307k
  }
4680
20.7M
    }
4681
902k
    if (nbchar != 0) {
4682
400k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
400k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
323k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
1.30k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
1.30k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
322k
      } else {
4691
322k
    if (ctxt->sax->characters != NULL)
4692
322k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
322k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
322k
        (*ctxt->space == -1))
4695
41.3k
        *ctxt->space = -2;
4696
322k
      }
4697
323k
  }
4698
400k
    }
4699
902k
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
564k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
564k
                          "PCDATA invalid Char value %d\n",
4703
564k
                    cur ? cur : CUR);
4704
564k
  NEXT;
4705
564k
    }
4706
902k
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
281k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
281k
    xmlChar *URI = NULL;
4735
4736
281k
    SHRINK;
4737
4738
281k
    *publicID = NULL;
4739
281k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
143k
        SKIP(6);
4741
143k
  if (SKIP_BLANKS == 0) {
4742
300
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
300
                     "Space required after 'SYSTEM'\n");
4744
300
  }
4745
143k
  URI = xmlParseSystemLiteral(ctxt);
4746
143k
  if (URI == NULL) {
4747
786
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
786
        }
4749
143k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
47.4k
        SKIP(6);
4751
47.4k
  if (SKIP_BLANKS == 0) {
4752
355
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
355
        "Space required after 'PUBLIC'\n");
4754
355
  }
4755
47.4k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
47.4k
  if (*publicID == NULL) {
4757
1.78k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
1.78k
  }
4759
47.4k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
34.2k
      if (SKIP_BLANKS == 0) {
4764
4.87k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
4.87k
      "Space required after the Public Identifier\n");
4766
4.87k
      }
4767
34.2k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
13.2k
      if (SKIP_BLANKS == 0) return(NULL);
4775
2.03k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
2.03k
  }
4777
35.6k
  URI = xmlParseSystemLiteral(ctxt);
4778
35.6k
  if (URI == NULL) {
4779
5.23k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
5.23k
        }
4781
35.6k
    }
4782
269k
    return(URI);
4783
281k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
65.4k
                       size_t len, size_t size) {
4802
65.4k
    int q, ql;
4803
65.4k
    int r, rl;
4804
65.4k
    int cur, l;
4805
65.4k
    size_t count = 0;
4806
65.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
30.3k
                       XML_MAX_HUGE_LENGTH :
4808
65.4k
                       XML_MAX_TEXT_LENGTH;
4809
65.4k
    int inputid;
4810
4811
65.4k
    inputid = ctxt->input->id;
4812
4813
65.4k
    if (buf == NULL) {
4814
4.51k
        len = 0;
4815
4.51k
  size = XML_PARSER_BUFFER_SIZE;
4816
4.51k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
4.51k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
4.51k
    }
4822
65.4k
    GROW; /* Assure there's enough input data */
4823
65.4k
    q = CUR_CHAR(ql);
4824
65.4k
    if (q == 0)
4825
13.9k
        goto not_terminated;
4826
51.4k
    if (!IS_CHAR(q)) {
4827
10.4k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
10.4k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
10.4k
                    q);
4830
10.4k
  xmlFree (buf);
4831
10.4k
  return;
4832
10.4k
    }
4833
41.0k
    NEXTL(ql);
4834
41.0k
    r = CUR_CHAR(rl);
4835
41.0k
    if (r == 0)
4836
1.24k
        goto not_terminated;
4837
39.8k
    if (!IS_CHAR(r)) {
4838
842
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
842
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
842
                    r);
4841
842
  xmlFree (buf);
4842
842
  return;
4843
842
    }
4844
38.9k
    NEXTL(rl);
4845
38.9k
    cur = CUR_CHAR(l);
4846
38.9k
    if (cur == 0)
4847
546
        goto not_terminated;
4848
11.5M
    while (IS_CHAR(cur) && /* checked */
4849
11.5M
           ((cur != '>') ||
4850
11.5M
      (r != '-') || (q != '-'))) {
4851
11.5M
  if ((r == '-') && (q == '-')) {
4852
7.28k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
7.28k
  }
4854
11.5M
  if (len + 5 >= size) {
4855
22.7k
      xmlChar *new_buf;
4856
22.7k
            size_t new_size;
4857
4858
22.7k
      new_size = size * 2;
4859
22.7k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
22.7k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
22.7k
      buf = new_buf;
4866
22.7k
            size = new_size;
4867
22.7k
  }
4868
11.5M
  COPY_BUF(ql,buf,len,q);
4869
11.5M
  q = r;
4870
11.5M
  ql = rl;
4871
11.5M
  r = cur;
4872
11.5M
  rl = l;
4873
4874
11.5M
  count++;
4875
11.5M
  if (count > 50) {
4876
213k
      SHRINK;
4877
213k
      GROW;
4878
213k
      count = 0;
4879
213k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
213k
  }
4884
11.5M
  NEXTL(l);
4885
11.5M
  cur = CUR_CHAR(l);
4886
11.5M
  if (cur == 0) {
4887
9.91k
      SHRINK;
4888
9.91k
      GROW;
4889
9.91k
      cur = CUR_CHAR(l);
4890
9.91k
  }
4891
4892
11.5M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
11.5M
    }
4899
38.4k
    buf[len] = 0;
4900
38.4k
    if (cur == 0) {
4901
9.91k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
9.91k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
28.5k
    } else if (!IS_CHAR(cur)) {
4904
3.86k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
3.86k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
3.86k
                    cur);
4907
24.6k
    } else {
4908
24.6k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
24.6k
        NEXT;
4914
24.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
24.6k
      (!ctxt->disableSAX))
4916
19.2k
      ctxt->sax->comment(ctxt->userData, buf);
4917
24.6k
    }
4918
38.4k
    xmlFree(buf);
4919
38.4k
    return;
4920
15.7k
not_terminated:
4921
15.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
15.7k
       "Comment not terminated\n", NULL);
4923
15.7k
    xmlFree(buf);
4924
15.7k
    return;
4925
38.4k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
12.7M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
12.7M
    xmlChar *buf = NULL;
4943
12.7M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
12.7M
    size_t len = 0;
4945
12.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
886k
                       XML_MAX_HUGE_LENGTH :
4947
12.7M
                       XML_MAX_TEXT_LENGTH;
4948
12.7M
    xmlParserInputState state;
4949
12.7M
    const xmlChar *in;
4950
12.7M
    size_t nbchar = 0;
4951
12.7M
    int ccol;
4952
12.7M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
12.7M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
12.7M
    SKIP(2);
4960
12.7M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
121
        return;
4962
12.7M
    state = ctxt->instate;
4963
12.7M
    ctxt->instate = XML_PARSER_COMMENT;
4964
12.7M
    inputid = ctxt->input->id;
4965
12.7M
    SKIP(2);
4966
12.7M
    SHRINK;
4967
12.7M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
12.7M
    in = ctxt->input->cur;
4974
12.7M
    do {
4975
12.7M
  if (*in == 0xA) {
4976
99.2k
      do {
4977
99.2k
    ctxt->input->line++; ctxt->input->col = 1;
4978
99.2k
    in++;
4979
99.2k
      } while (*in == 0xA);
4980
97.8k
  }
4981
15.4M
get_more:
4982
15.4M
        ccol = ctxt->input->col;
4983
118M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
118M
         ((*in >= 0x20) && (*in < '-')) ||
4985
118M
         (*in == 0x09)) {
4986
103M
        in++;
4987
103M
        ccol++;
4988
103M
  }
4989
15.4M
  ctxt->input->col = ccol;
4990
15.4M
  if (*in == 0xA) {
4991
979k
      do {
4992
979k
    ctxt->input->line++; ctxt->input->col = 1;
4993
979k
    in++;
4994
979k
      } while (*in == 0xA);
4995
939k
      goto get_more;
4996
939k
  }
4997
14.4M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
14.4M
  if (nbchar > 0) {
5002
2.34M
      if ((ctxt->sax != NULL) &&
5003
2.34M
    (ctxt->sax->comment != NULL)) {
5004
2.34M
    if (buf == NULL) {
5005
654k
        if ((*in == '-') && (in[1] == '-'))
5006
355k
            size = nbchar + 1;
5007
298k
        else
5008
298k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
654k
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
654k
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
654k
        len = 0;
5016
1.68M
    } else if (len + nbchar + 1 >= size) {
5017
212k
        xmlChar *new_buf;
5018
212k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
212k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
212k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
212k
        buf = new_buf;
5027
212k
    }
5028
2.34M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
2.34M
    len += nbchar;
5030
2.34M
    buf[len] = 0;
5031
2.34M
      }
5032
2.34M
  }
5033
14.4M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
14.4M
  ctxt->input->cur = in;
5040
14.4M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
14.4M
  if (*in == 0xD) {
5045
891k
      in++;
5046
891k
      if (*in == 0xA) {
5047
886k
    ctxt->input->cur = in;
5048
886k
    in++;
5049
886k
    ctxt->input->line++; ctxt->input->col = 1;
5050
886k
    goto get_more;
5051
886k
      }
5052
4.55k
      in--;
5053
4.55k
  }
5054
13.5M
  SHRINK;
5055
13.5M
  GROW;
5056
13.5M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
13.5M
  in = ctxt->input->cur;
5061
13.5M
  if (*in == '-') {
5062
13.5M
      if (in[1] == '-') {
5063
12.7M
          if (in[2] == '>') {
5064
12.7M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
12.7M
        SKIP(3);
5070
12.7M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
12.7M
            (!ctxt->disableSAX)) {
5072
12.1M
      if (buf != NULL)
5073
535k
          ctxt->sax->comment(ctxt->userData, buf);
5074
11.6M
      else
5075
11.6M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
12.1M
        }
5077
12.7M
        if (buf != NULL)
5078
593k
            xmlFree(buf);
5079
12.7M
        if (ctxt->instate != XML_PARSER_EOF)
5080
12.7M
      ctxt->instate = state;
5081
12.7M
        return;
5082
12.7M
    }
5083
17.7k
    if (buf != NULL) {
5084
10.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
10.6k
                          "Double hyphen within comment: "
5086
10.6k
                                      "<!--%.50s\n",
5087
10.6k
              buf);
5088
10.6k
    } else
5089
7.14k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
7.14k
                          "Double hyphen within comment\n", NULL);
5091
17.7k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
17.7k
    in++;
5096
17.7k
    ctxt->input->col++;
5097
17.7k
      }
5098
827k
      in++;
5099
827k
      ctxt->input->col++;
5100
827k
      goto get_more;
5101
13.5M
  }
5102
13.5M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
65.4k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
65.4k
    ctxt->instate = state;
5105
65.4k
    return;
5106
12.7M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
98.5k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
98.5k
    const xmlChar *name;
5125
5126
98.5k
    name = xmlParseName(ctxt);
5127
98.5k
    if ((name != NULL) &&
5128
98.5k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
98.5k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
98.5k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
30.8k
  int i;
5132
30.8k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
30.8k
      (name[2] == 'l') && (name[3] == 0)) {
5134
6.23k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
6.23k
     "XML declaration allowed only at the start of the document\n");
5136
6.23k
      return(name);
5137
24.5k
  } else if (name[3] == 0) {
5138
1.45k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
1.45k
      return(name);
5140
1.45k
  }
5141
50.9k
  for (i = 0;;i++) {
5142
50.9k
      if (xmlW3CPIs[i] == NULL) break;
5143
37.1k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
9.30k
          return(name);
5145
37.1k
  }
5146
13.8k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
13.8k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
13.8k
          NULL, NULL);
5149
13.8k
    }
5150
81.5k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
2.31k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
2.31k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
2.31k
    }
5154
81.5k
    return(name);
5155
98.5k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
360
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
360
    xmlChar *URL = NULL;
5176
360
    const xmlChar *tmp, *base;
5177
360
    xmlChar marker;
5178
5179
360
    tmp = catalog;
5180
360
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
360
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
39
  goto error;
5183
321
    tmp += 7;
5184
12.9k
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
321
    if (*tmp != '=') {
5186
48
  return;
5187
48
    }
5188
273
    tmp++;
5189
336
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
273
    marker = *tmp;
5191
273
    if ((marker != '\'') && (marker != '"'))
5192
36
  goto error;
5193
237
    tmp++;
5194
237
    base = tmp;
5195
2.91k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
237
    if (*tmp == 0)
5197
18
  goto error;
5198
219
    URL = xmlStrndup(base, tmp - base);
5199
219
    tmp++;
5200
552
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
219
    if (*tmp != 0)
5202
60
  goto error;
5203
5204
159
    if (URL != NULL) {
5205
159
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
159
  xmlFree(URL);
5207
159
    }
5208
159
    return;
5209
5210
153
error:
5211
153
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
153
            "Catalog PI syntax error: %s\n",
5213
153
      catalog, NULL);
5214
153
    if (URL != NULL)
5215
60
  xmlFree(URL);
5216
153
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
98.5k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
98.5k
    xmlChar *buf = NULL;
5235
98.5k
    size_t len = 0;
5236
98.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
98.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
33.9k
                       XML_MAX_HUGE_LENGTH :
5239
98.5k
                       XML_MAX_TEXT_LENGTH;
5240
98.5k
    int cur, l;
5241
98.5k
    const xmlChar *target;
5242
98.5k
    xmlParserInputState state;
5243
98.5k
    int count = 0;
5244
5245
98.5k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
98.5k
  int inputid = ctxt->input->id;
5247
98.5k
  state = ctxt->instate;
5248
98.5k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
98.5k
  SKIP(2);
5253
98.5k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
98.5k
        target = xmlParsePITarget(ctxt);
5260
98.5k
  if (target != NULL) {
5261
92.1k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
12.3k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
12.3k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
12.3k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
12.3k
        (ctxt->sax->processingInstruction != NULL))
5274
8.43k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
8.43k
                                         target, NULL);
5276
12.3k
    if (ctxt->instate != XML_PARSER_EOF)
5277
12.3k
        ctxt->instate = state;
5278
12.3k
    return;
5279
12.3k
      }
5280
79.7k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
79.7k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
79.7k
      if (SKIP_BLANKS == 0) {
5287
19.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
19.3k
        "ParsePI: PI %s space expected\n", target);
5289
19.3k
      }
5290
79.7k
      cur = CUR_CHAR(l);
5291
17.1M
      while (IS_CHAR(cur) && /* checked */
5292
17.1M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
17.1M
    if (len + 5 >= size) {
5294
20.7k
        xmlChar *tmp;
5295
20.7k
                    size_t new_size = size * 2;
5296
20.7k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
20.7k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
20.7k
        buf = tmp;
5304
20.7k
                    size = new_size;
5305
20.7k
    }
5306
17.1M
    count++;
5307
17.1M
    if (count > 50) {
5308
311k
        SHRINK;
5309
311k
        GROW;
5310
311k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
311k
        count = 0;
5315
311k
    }
5316
17.1M
    COPY_BUF(l,buf,len,cur);
5317
17.1M
    NEXTL(l);
5318
17.1M
    cur = CUR_CHAR(l);
5319
17.1M
    if (cur == 0) {
5320
7.86k
        SHRINK;
5321
7.86k
        GROW;
5322
7.86k
        cur = CUR_CHAR(l);
5323
7.86k
    }
5324
17.1M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
17.1M
      }
5332
79.7k
      buf[len] = 0;
5333
79.7k
      if (cur != '?') {
5334
11.9k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
11.9k
          "ParsePI: PI %s never end ...\n", target);
5336
67.7k
      } else {
5337
67.7k
    if (inputid != ctxt->input->id) {
5338
167
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
167
                             "PI declaration doesn't start and stop in"
5340
167
                                   " the same entity\n");
5341
167
    }
5342
67.7k
    SKIP(2);
5343
5344
67.7k
#ifdef LIBXML_CATALOG_ENABLED
5345
67.7k
    if (((state == XML_PARSER_MISC) ||
5346
67.7k
               (state == XML_PARSER_START)) &&
5347
67.7k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
360
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
360
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
360
      (allow == XML_CATA_ALLOW_ALL))
5351
360
      xmlParseCatalogPI(ctxt, buf);
5352
360
    }
5353
67.7k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
67.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
67.7k
        (ctxt->sax->processingInstruction != NULL))
5361
54.3k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
54.3k
                                         target, buf);
5363
67.7k
      }
5364
79.7k
      xmlFree(buf);
5365
79.7k
  } else {
5366
6.42k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
6.42k
  }
5368
86.1k
  if (ctxt->instate != XML_PARSER_EOF)
5369
86.1k
      ctxt->instate = state;
5370
86.1k
    }
5371
98.5k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
21.4k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
21.4k
    const xmlChar *name;
5394
21.4k
    xmlChar *Pubid;
5395
21.4k
    xmlChar *Systemid;
5396
5397
21.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
21.4k
    SKIP(2);
5400
5401
21.4k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
21.1k
  int inputid = ctxt->input->id;
5403
21.1k
  SHRINK;
5404
21.1k
  SKIP(8);
5405
21.1k
  if (SKIP_BLANKS == 0) {
5406
187
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
187
         "Space required after '<!NOTATION'\n");
5408
187
      return;
5409
187
  }
5410
5411
20.9k
        name = xmlParseName(ctxt);
5412
20.9k
  if (name == NULL) {
5413
454
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
454
      return;
5415
454
  }
5416
20.5k
  if (xmlStrchr(name, ':') != NULL) {
5417
682
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
682
         "colons are forbidden from notation names '%s'\n",
5419
682
         name, NULL, NULL);
5420
682
  }
5421
20.5k
  if (SKIP_BLANKS == 0) {
5422
1.36k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.36k
         "Space required after the NOTATION name'\n");
5424
1.36k
      return;
5425
1.36k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
19.1k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
19.1k
  SKIP_BLANKS;
5432
5433
19.1k
  if (RAW == '>') {
5434
14.4k
      if (inputid != ctxt->input->id) {
5435
12
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
12
                         "Notation declaration doesn't start and stop"
5437
12
                               " in the same entity\n");
5438
12
      }
5439
14.4k
      NEXT;
5440
14.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
14.4k
    (ctxt->sax->notationDecl != NULL))
5442
11.1k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
14.4k
  } else {
5444
4.66k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
4.66k
  }
5446
19.1k
  if (Systemid != NULL) xmlFree(Systemid);
5447
19.1k
  if (Pubid != NULL) xmlFree(Pubid);
5448
19.1k
    }
5449
21.4k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
653k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
653k
    const xmlChar *name = NULL;
5478
653k
    xmlChar *value = NULL;
5479
653k
    xmlChar *URI = NULL, *literal = NULL;
5480
653k
    const xmlChar *ndata = NULL;
5481
653k
    int isParameter = 0;
5482
653k
    xmlChar *orig = NULL;
5483
5484
653k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
653k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
653k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
653k
  int inputid = ctxt->input->id;
5491
653k
  SHRINK;
5492
653k
  SKIP(6);
5493
653k
  if (SKIP_BLANKS == 0) {
5494
2.17k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
2.17k
         "Space required after '<!ENTITY'\n");
5496
2.17k
  }
5497
5498
653k
  if (RAW == '%') {
5499
343k
      NEXT;
5500
343k
      if (SKIP_BLANKS == 0) {
5501
229
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
229
             "Space required after '%%'\n");
5503
229
      }
5504
343k
      isParameter = 1;
5505
343k
  }
5506
5507
653k
        name = xmlParseName(ctxt);
5508
653k
  if (name == NULL) {
5509
1.57k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
1.57k
                     "xmlParseEntityDecl: no name\n");
5511
1.57k
            return;
5512
1.57k
  }
5513
651k
  if (xmlStrchr(name, ':') != NULL) {
5514
1.48k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
1.48k
         "colons are forbidden from entities names '%s'\n",
5516
1.48k
         name, NULL, NULL);
5517
1.48k
  }
5518
651k
  if (SKIP_BLANKS == 0) {
5519
3.65k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
3.65k
         "Space required after the entity name\n");
5521
3.65k
  }
5522
5523
651k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
651k
  if (isParameter) {
5528
343k
      if ((RAW == '"') || (RAW == '\'')) {
5529
320k
          value = xmlParseEntityValue(ctxt, &orig);
5530
320k
    if (value) {
5531
314k
        if ((ctxt->sax != NULL) &&
5532
314k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
289k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
289k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
289k
            NULL, NULL, value);
5536
314k
    }
5537
320k
      } else {
5538
23.1k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
23.1k
    if ((URI == NULL) && (literal == NULL)) {
5540
554
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
554
    }
5542
23.1k
    if (URI) {
5543
22.4k
        xmlURIPtr uri;
5544
5545
22.4k
        uri = xmlParseURI((const char *) URI);
5546
22.4k
        if (uri == NULL) {
5547
707
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
707
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
21.7k
        } else {
5555
21.7k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
38
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
21.6k
      } else {
5562
21.6k
          if ((ctxt->sax != NULL) &&
5563
21.6k
        (!ctxt->disableSAX) &&
5564
21.6k
        (ctxt->sax->entityDecl != NULL))
5565
20.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
20.8k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
20.8k
              literal, URI, NULL);
5568
21.6k
      }
5569
21.7k
      xmlFreeURI(uri);
5570
21.7k
        }
5571
22.4k
    }
5572
23.1k
      }
5573
343k
  } else {
5574
308k
      if ((RAW == '"') || (RAW == '\'')) {
5575
221k
          value = xmlParseEntityValue(ctxt, &orig);
5576
221k
    if ((ctxt->sax != NULL) &&
5577
221k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
208k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
208k
        XML_INTERNAL_GENERAL_ENTITY,
5580
208k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
221k
    if ((ctxt->myDoc == NULL) ||
5585
221k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
2.33k
        if (ctxt->myDoc == NULL) {
5587
207
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
207
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
207
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
207
        }
5594
2.33k
        if (ctxt->myDoc->intSubset == NULL)
5595
207
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
207
              BAD_CAST "fake", NULL, NULL);
5597
5598
2.33k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
2.33k
                    NULL, NULL, value);
5600
2.33k
    }
5601
221k
      } else {
5602
86.1k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
86.1k
    if ((URI == NULL) && (literal == NULL)) {
5604
3.89k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
3.89k
    }
5606
86.1k
    if (URI) {
5607
78.1k
        xmlURIPtr uri;
5608
5609
78.1k
        uri = xmlParseURI((const char *)URI);
5610
78.1k
        if (uri == NULL) {
5611
2.18k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
2.18k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
75.9k
        } else {
5619
75.9k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
1.69k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
1.69k
      }
5626
75.9k
      xmlFreeURI(uri);
5627
75.9k
        }
5628
78.1k
    }
5629
86.1k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
4.12k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
4.12k
           "Space required before 'NDATA'\n");
5632
4.12k
    }
5633
86.1k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
9.73k
        SKIP(5);
5635
9.73k
        if (SKIP_BLANKS == 0) {
5636
341
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
341
               "Space required after 'NDATA'\n");
5638
341
        }
5639
9.73k
        ndata = xmlParseName(ctxt);
5640
9.73k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
9.73k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
8.63k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
8.63k
            literal, URI, ndata);
5644
76.3k
    } else {
5645
76.3k
        if ((ctxt->sax != NULL) &&
5646
76.3k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
71.3k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
71.3k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
71.3k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
76.3k
        if ((ctxt->replaceEntities != 0) &&
5655
76.3k
      ((ctxt->myDoc == NULL) ||
5656
45.5k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
331
      if (ctxt->myDoc == NULL) {
5658
50
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
50
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
50
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
50
      }
5665
5666
331
      if (ctxt->myDoc->intSubset == NULL)
5667
50
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
50
            BAD_CAST "fake", NULL, NULL);
5669
331
      xmlSAX2EntityDecl(ctxt, name,
5670
331
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
331
                  literal, URI, NULL);
5672
331
        }
5673
76.3k
    }
5674
86.1k
      }
5675
308k
  }
5676
651k
  if (ctxt->instate == XML_PARSER_EOF)
5677
142
      goto done;
5678
651k
  SKIP_BLANKS;
5679
651k
  if (RAW != '>') {
5680
7.01k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
7.01k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
7.01k
      xmlHaltParser(ctxt);
5683
644k
  } else {
5684
644k
      if (inputid != ctxt->input->id) {
5685
46
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
46
                         "Entity declaration doesn't start and stop in"
5687
46
                               " the same entity\n");
5688
46
      }
5689
644k
      NEXT;
5690
644k
  }
5691
651k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
534k
      xmlEntityPtr cur = NULL;
5696
5697
534k
      if (isParameter) {
5698
315k
          if ((ctxt->sax != NULL) &&
5699
315k
        (ctxt->sax->getParameterEntity != NULL))
5700
315k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
315k
      } else {
5702
219k
          if ((ctxt->sax != NULL) &&
5703
219k
        (ctxt->sax->getEntity != NULL))
5704
219k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
219k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
9.26k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
9.26k
    }
5708
219k
      }
5709
534k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
437k
    cur->orig = orig;
5711
437k
                orig = NULL;
5712
437k
      }
5713
534k
  }
5714
5715
651k
done:
5716
651k
  if (value != NULL) xmlFree(value);
5717
651k
  if (URI != NULL) xmlFree(URI);
5718
651k
  if (literal != NULL) xmlFree(literal);
5719
651k
        if (orig != NULL) xmlFree(orig);
5720
651k
    }
5721
653k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
2.38M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
2.38M
    int val;
5757
2.38M
    xmlChar *ret;
5758
5759
2.38M
    *value = NULL;
5760
2.38M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
173k
  SKIP(9);
5762
173k
  return(XML_ATTRIBUTE_REQUIRED);
5763
173k
    }
5764
2.21M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
2.00M
  SKIP(8);
5766
2.00M
  return(XML_ATTRIBUTE_IMPLIED);
5767
2.00M
    }
5768
206k
    val = XML_ATTRIBUTE_NONE;
5769
206k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
90.7k
  SKIP(6);
5771
90.7k
  val = XML_ATTRIBUTE_FIXED;
5772
90.7k
  if (SKIP_BLANKS == 0) {
5773
103
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
103
         "Space required after '#FIXED'\n");
5775
103
  }
5776
90.7k
    }
5777
206k
    ret = xmlParseAttValue(ctxt);
5778
206k
    ctxt->instate = XML_PARSER_DTD;
5779
206k
    if (ret == NULL) {
5780
3.32k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
3.32k
           "Attribute default value declaration error\n");
5782
3.32k
    } else
5783
203k
        *value = ret;
5784
206k
    return(val);
5785
2.21M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
4.27k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
4.27k
    const xmlChar *name;
5809
4.27k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
4.27k
    if (RAW != '(') {
5812
228
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
228
  return(NULL);
5814
228
    }
5815
4.04k
    SHRINK;
5816
9.44k
    do {
5817
9.44k
        NEXT;
5818
9.44k
  SKIP_BLANKS;
5819
9.44k
        name = xmlParseName(ctxt);
5820
9.44k
  if (name == NULL) {
5821
421
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
421
         "Name expected in NOTATION declaration\n");
5823
421
            xmlFreeEnumeration(ret);
5824
421
      return(NULL);
5825
421
  }
5826
9.02k
  tmp = ret;
5827
25.0k
  while (tmp != NULL) {
5828
17.2k
      if (xmlStrEqual(name, tmp->name)) {
5829
1.19k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
1.19k
    "standalone: attribute notation value token %s duplicated\n",
5831
1.19k
         name, NULL);
5832
1.19k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
1.19k
    break;
5835
1.19k
      }
5836
16.0k
      tmp = tmp->next;
5837
16.0k
  }
5838
9.02k
  if (tmp == NULL) {
5839
7.83k
      cur = xmlCreateEnumeration(name);
5840
7.83k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
7.83k
      if (last == NULL) ret = last = cur;
5845
3.90k
      else {
5846
3.90k
    last->next = cur;
5847
3.90k
    last = cur;
5848
3.90k
      }
5849
7.83k
  }
5850
9.02k
  SKIP_BLANKS;
5851
9.02k
    } while (RAW == '|');
5852
3.62k
    if (RAW != ')') {
5853
949
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
949
        xmlFreeEnumeration(ret);
5855
949
  return(NULL);
5856
949
    }
5857
2.67k
    NEXT;
5858
2.67k
    return(ret);
5859
3.62k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
270k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
270k
    xmlChar *name;
5881
270k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
270k
    if (RAW != '(') {
5884
4.51k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
4.51k
  return(NULL);
5886
4.51k
    }
5887
265k
    SHRINK;
5888
846k
    do {
5889
846k
        NEXT;
5890
846k
  SKIP_BLANKS;
5891
846k
        name = xmlParseNmtoken(ctxt);
5892
846k
  if (name == NULL) {
5893
495
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
495
      return(ret);
5895
495
  }
5896
845k
  tmp = ret;
5897
2.31M
  while (tmp != NULL) {
5898
1.46M
      if (xmlStrEqual(name, tmp->name)) {
5899
1.08k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
1.08k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
1.08k
         name, NULL);
5902
1.08k
    if (!xmlDictOwns(ctxt->dict, name))
5903
1.08k
        xmlFree(name);
5904
1.08k
    break;
5905
1.08k
      }
5906
1.46M
      tmp = tmp->next;
5907
1.46M
  }
5908
845k
  if (tmp == NULL) {
5909
844k
      cur = xmlCreateEnumeration(name);
5910
844k
      if (!xmlDictOwns(ctxt->dict, name))
5911
844k
    xmlFree(name);
5912
844k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
844k
      if (last == NULL) ret = last = cur;
5917
578k
      else {
5918
578k
    last->next = cur;
5919
578k
    last = cur;
5920
578k
      }
5921
844k
  }
5922
845k
  SKIP_BLANKS;
5923
845k
    } while (RAW == '|');
5924
265k
    if (RAW != ')') {
5925
1.38k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
1.38k
  return(ret);
5927
1.38k
    }
5928
264k
    NEXT;
5929
264k
    return(ret);
5930
265k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
274k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
274k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
4.34k
  SKIP(8);
5953
4.34k
  if (SKIP_BLANKS == 0) {
5954
72
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
72
         "Space required after 'NOTATION'\n");
5956
72
      return(0);
5957
72
  }
5958
4.27k
  *tree = xmlParseNotationType(ctxt);
5959
4.27k
  if (*tree == NULL) return(0);
5960
2.67k
  return(XML_ATTRIBUTE_NOTATION);
5961
4.27k
    }
5962
270k
    *tree = xmlParseEnumerationType(ctxt);
5963
270k
    if (*tree == NULL) return(0);
5964
265k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
270k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
2.39M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
2.39M
    SHRINK;
6017
2.39M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
924k
  SKIP(5);
6019
924k
  return(XML_ATTRIBUTE_CDATA);
6020
1.46M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
7.91k
  SKIP(6);
6022
7.91k
  return(XML_ATTRIBUTE_IDREFS);
6023
1.46M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
27.3k
  SKIP(5);
6025
27.3k
  return(XML_ATTRIBUTE_IDREF);
6026
1.43M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
609k
        SKIP(2);
6028
609k
  return(XML_ATTRIBUTE_ID);
6029
823k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
9.19k
  SKIP(6);
6031
9.19k
  return(XML_ATTRIBUTE_ENTITY);
6032
814k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
642
  SKIP(8);
6034
642
  return(XML_ATTRIBUTE_ENTITIES);
6035
813k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
118k
  SKIP(8);
6037
118k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
694k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
420k
  SKIP(7);
6040
420k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
420k
     }
6042
274k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
2.39M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
786k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
786k
    const xmlChar *elemName;
6061
786k
    const xmlChar *attrName;
6062
786k
    xmlEnumerationPtr tree;
6063
6064
786k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
786k
    SKIP(2);
6067
6068
786k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
785k
  int inputid = ctxt->input->id;
6070
6071
785k
  SKIP(7);
6072
785k
  if (SKIP_BLANKS == 0) {
6073
1.63k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
1.63k
                     "Space required after '<!ATTLIST'\n");
6075
1.63k
  }
6076
785k
        elemName = xmlParseName(ctxt);
6077
785k
  if (elemName == NULL) {
6078
896
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
896
         "ATTLIST: no name for Element\n");
6080
896
      return;
6081
896
  }
6082
784k
  SKIP_BLANKS;
6083
784k
  GROW;
6084
3.16M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
2.39M
      int type;
6086
2.39M
      int def;
6087
2.39M
      xmlChar *defaultValue = NULL;
6088
6089
2.39M
      GROW;
6090
2.39M
            tree = NULL;
6091
2.39M
      attrName = xmlParseName(ctxt);
6092
2.39M
      if (attrName == NULL) {
6093
4.54k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
4.54k
             "ATTLIST: no name for Attribute\n");
6095
4.54k
    break;
6096
4.54k
      }
6097
2.39M
      GROW;
6098
2.39M
      if (SKIP_BLANKS == 0) {
6099
1.17k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
1.17k
            "Space required after the attribute name\n");
6101
1.17k
    break;
6102
1.17k
      }
6103
6104
2.39M
      type = xmlParseAttributeType(ctxt, &tree);
6105
2.39M
      if (type <= 0) {
6106
6.40k
          break;
6107
6.40k
      }
6108
6109
2.38M
      GROW;
6110
2.38M
      if (SKIP_BLANKS == 0) {
6111
2.36k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
2.36k
             "Space required after the attribute type\n");
6113
2.36k
          if (tree != NULL)
6114
1.78k
        xmlFreeEnumeration(tree);
6115
2.36k
    break;
6116
2.36k
      }
6117
6118
2.38M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
2.38M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
2.38M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
97.7k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
2.38M
      GROW;
6130
2.38M
            if (RAW != '>') {
6131
2.26M
    if (SKIP_BLANKS == 0) {
6132
6.49k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
6.49k
      "Space required after the attribute default value\n");
6134
6.49k
        if (defaultValue != NULL)
6135
3.19k
      xmlFree(defaultValue);
6136
6.49k
        if (tree != NULL)
6137
823
      xmlFreeEnumeration(tree);
6138
6.49k
        break;
6139
6.49k
    }
6140
2.26M
      }
6141
2.37M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
2.37M
    (ctxt->sax->attributeDecl != NULL))
6143
2.17M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
2.17M
                          type, def, defaultValue, tree);
6145
207k
      else if (tree != NULL)
6146
24.0k
    xmlFreeEnumeration(tree);
6147
6148
2.37M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
2.37M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
2.37M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
128k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
128k
      }
6153
2.37M
      if (ctxt->sax2) {
6154
1.49M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
1.49M
      }
6156
2.37M
      if (defaultValue != NULL)
6157
200k
          xmlFree(defaultValue);
6158
2.37M
      GROW;
6159
2.37M
  }
6160
784k
  if (RAW == '>') {
6161
766k
      if (inputid != ctxt->input->id) {
6162
7.47k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
7.47k
                               "Attribute list declaration doesn't start and"
6164
7.47k
                               " stop in the same entity\n");
6165
7.47k
      }
6166
766k
      NEXT;
6167
766k
  }
6168
784k
    }
6169
786k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
338k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
338k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
338k
    const xmlChar *elem = NULL;
6196
6197
338k
    GROW;
6198
338k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
338k
  SKIP(7);
6200
338k
  SKIP_BLANKS;
6201
338k
  SHRINK;
6202
338k
  if (RAW == ')') {
6203
195k
      if (ctxt->input->id != inputchk) {
6204
33
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
33
                               "Element content declaration doesn't start and"
6206
33
                               " stop in the same entity\n");
6207
33
      }
6208
195k
      NEXT;
6209
195k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
195k
      if (ret == NULL)
6211
0
          return(NULL);
6212
195k
      if (RAW == '*') {
6213
217
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
217
    NEXT;
6215
217
      }
6216
195k
      return(ret);
6217
195k
  }
6218
143k
  if ((RAW == '(') || (RAW == '|')) {
6219
143k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
143k
      if (ret == NULL) return(NULL);
6221
143k
  }
6222
1.64M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
1.50M
      NEXT;
6224
1.50M
      if (elem == NULL) {
6225
143k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
143k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
143k
    ret->c1 = cur;
6231
143k
    if (cur != NULL)
6232
143k
        cur->parent = ret;
6233
143k
    cur = ret;
6234
1.36M
      } else {
6235
1.36M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
1.36M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
1.36M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
1.36M
    if (n->c1 != NULL)
6242
1.36M
        n->c1->parent = n;
6243
1.36M
          cur->c2 = n;
6244
1.36M
    if (n != NULL)
6245
1.36M
        n->parent = cur;
6246
1.36M
    cur = n;
6247
1.36M
      }
6248
1.50M
      SKIP_BLANKS;
6249
1.50M
      elem = xmlParseName(ctxt);
6250
1.50M
      if (elem == NULL) {
6251
520
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
520
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
520
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
520
    return(NULL);
6255
520
      }
6256
1.50M
      SKIP_BLANKS;
6257
1.50M
      GROW;
6258
1.50M
  }
6259
143k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
142k
      if (elem != NULL) {
6261
142k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
142k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
142k
    if (cur->c2 != NULL)
6264
142k
        cur->c2->parent = cur;
6265
142k
            }
6266
142k
            if (ret != NULL)
6267
142k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
142k
      if (ctxt->input->id != inputchk) {
6269
21
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
21
                               "Element content declaration doesn't start and"
6271
21
                               " stop in the same entity\n");
6272
21
      }
6273
142k
      SKIP(2);
6274
142k
  } else {
6275
1.05k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
1.05k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
1.05k
      return(NULL);
6278
1.05k
  }
6279
6280
143k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
142k
    return(ret);
6284
338k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
408k
                                       int depth) {
6321
408k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
408k
    const xmlChar *elem;
6323
408k
    xmlChar type = 0;
6324
6325
408k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
408k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
408k
    SKIP_BLANKS;
6333
408k
    GROW;
6334
408k
    if (RAW == '(') {
6335
18.6k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
18.6k
  NEXT;
6339
18.6k
  SKIP_BLANKS;
6340
18.6k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
18.6k
                                                           depth + 1);
6342
18.6k
        if (cur == NULL)
6343
834
            return(NULL);
6344
17.8k
  SKIP_BLANKS;
6345
17.8k
  GROW;
6346
389k
    } else {
6347
389k
  elem = xmlParseName(ctxt);
6348
389k
  if (elem == NULL) {
6349
1.70k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
1.70k
      return(NULL);
6351
1.70k
  }
6352
388k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
388k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
388k
  GROW;
6358
388k
  if (RAW == '?') {
6359
43.4k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
43.4k
      NEXT;
6361
344k
  } else if (RAW == '*') {
6362
40.4k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
40.4k
      NEXT;
6364
304k
  } else if (RAW == '+') {
6365
58.4k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
58.4k
      NEXT;
6367
245k
  } else {
6368
245k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
245k
  }
6370
388k
  GROW;
6371
388k
    }
6372
405k
    SKIP_BLANKS;
6373
405k
    SHRINK;
6374
1.73M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
1.33M
        if (RAW == ',') {
6379
407k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
257k
      else if (type != CUR) {
6385
45
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
45
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
45
                      type);
6388
45
    if ((last != NULL) && (last != ret))
6389
45
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
45
    if (ret != NULL)
6391
45
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
45
    return(NULL);
6393
45
      }
6394
407k
      NEXT;
6395
6396
407k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
407k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
407k
      if (last == NULL) {
6404
149k
    op->c1 = ret;
6405
149k
    if (ret != NULL)
6406
149k
        ret->parent = op;
6407
149k
    ret = cur = op;
6408
257k
      } else {
6409
257k
          cur->c2 = op;
6410
257k
    if (op != NULL)
6411
257k
        op->parent = cur;
6412
257k
    op->c1 = last;
6413
257k
    if (last != NULL)
6414
257k
        last->parent = op;
6415
257k
    cur =op;
6416
257k
    last = NULL;
6417
257k
      }
6418
923k
  } else if (RAW == '|') {
6419
919k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
798k
      else if (type != CUR) {
6425
24
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
24
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
24
          type);
6428
24
    if ((last != NULL) && (last != ret))
6429
24
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
24
    if (ret != NULL)
6431
24
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
24
    return(NULL);
6433
24
      }
6434
919k
      NEXT;
6435
6436
919k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
919k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
919k
      if (last == NULL) {
6445
120k
    op->c1 = ret;
6446
120k
    if (ret != NULL)
6447
120k
        ret->parent = op;
6448
120k
    ret = cur = op;
6449
798k
      } else {
6450
798k
          cur->c2 = op;
6451
798k
    if (op != NULL)
6452
798k
        op->parent = cur;
6453
798k
    op->c1 = last;
6454
798k
    if (last != NULL)
6455
798k
        last->parent = op;
6456
798k
    cur =op;
6457
798k
    last = NULL;
6458
798k
      }
6459
919k
  } else {
6460
4.07k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
4.07k
      if ((last != NULL) && (last != ret))
6462
1.57k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
4.07k
      if (ret != NULL)
6464
4.07k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
4.07k
      return(NULL);
6466
4.07k
  }
6467
1.32M
  GROW;
6468
1.32M
  SKIP_BLANKS;
6469
1.32M
  GROW;
6470
1.32M
  if (RAW == '(') {
6471
59.6k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
59.6k
      NEXT;
6474
59.6k
      SKIP_BLANKS;
6475
59.6k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
59.6k
                                                          depth + 1);
6477
59.6k
            if (last == NULL) {
6478
501
    if (ret != NULL)
6479
501
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
501
    return(NULL);
6481
501
            }
6482
59.1k
      SKIP_BLANKS;
6483
1.26M
  } else {
6484
1.26M
      elem = xmlParseName(ctxt);
6485
1.26M
      if (elem == NULL) {
6486
1.08k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
1.08k
    if (ret != NULL)
6488
1.08k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
1.08k
    return(NULL);
6490
1.08k
      }
6491
1.26M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
1.26M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
1.26M
      if (RAW == '?') {
6498
154k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
154k
    NEXT;
6500
1.11M
      } else if (RAW == '*') {
6501
97.4k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
97.4k
    NEXT;
6503
1.01M
      } else if (RAW == '+') {
6504
21.3k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
21.3k
    NEXT;
6506
992k
      } else {
6507
992k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
992k
      }
6509
1.26M
  }
6510
1.32M
  SKIP_BLANKS;
6511
1.32M
  GROW;
6512
1.32M
    }
6513
400k
    if ((cur != NULL) && (last != NULL)) {
6514
267k
        cur->c2 = last;
6515
267k
  if (last != NULL)
6516
267k
      last->parent = cur;
6517
267k
    }
6518
400k
    if (ctxt->input->id != inputchk) {
6519
309
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
309
                       "Element content declaration doesn't start and stop in"
6521
309
                       " the same entity\n");
6522
309
    }
6523
400k
    NEXT;
6524
400k
    if (RAW == '?') {
6525
14.6k
  if (ret != NULL) {
6526
14.6k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
14.6k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
12
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
14.6k
      else
6530
14.6k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
14.6k
  }
6532
14.6k
  NEXT;
6533
385k
    } else if (RAW == '*') {
6534
100k
  if (ret != NULL) {
6535
100k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
100k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
641k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
541k
    if ((cur->c1 != NULL) &&
6543
541k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
541k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
4.46k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
541k
    if ((cur->c2 != NULL) &&
6547
541k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
541k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
731
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
541k
    cur = cur->c2;
6551
541k
      }
6552
100k
  }
6553
100k
  NEXT;
6554
284k
    } else if (RAW == '+') {
6555
56.8k
  if (ret != NULL) {
6556
56.8k
      int found = 0;
6557
6558
56.8k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
56.8k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
56.8k
      else
6562
56.8k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
94.2k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
37.3k
    if ((cur->c1 != NULL) &&
6570
37.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
37.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
197
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
197
        found = 1;
6574
197
    }
6575
37.3k
    if ((cur->c2 != NULL) &&
6576
37.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
37.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
134
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
134
        found = 1;
6580
134
    }
6581
37.3k
    cur = cur->c2;
6582
37.3k
      }
6583
56.8k
      if (found)
6584
251
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
56.8k
  }
6586
56.8k
  NEXT;
6587
56.8k
    }
6588
400k
    return(ret);
6589
405k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
669k
                           xmlElementContentPtr *result) {
6648
6649
669k
    xmlElementContentPtr tree = NULL;
6650
669k
    int inputid = ctxt->input->id;
6651
669k
    int res;
6652
6653
669k
    *result = NULL;
6654
6655
669k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
669k
    NEXT;
6661
669k
    GROW;
6662
669k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
669k
    SKIP_BLANKS;
6665
669k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
338k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
338k
  res = XML_ELEMENT_TYPE_MIXED;
6668
338k
    } else {
6669
330k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
330k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
330k
    }
6672
669k
    SKIP_BLANKS;
6673
669k
    *result = tree;
6674
669k
    return(res);
6675
669k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
891k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
891k
    const xmlChar *name;
6695
891k
    int ret = -1;
6696
891k
    xmlElementContentPtr content  = NULL;
6697
6698
891k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
891k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
891k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
890k
  int inputid = ctxt->input->id;
6705
6706
890k
  SKIP(7);
6707
890k
  if (SKIP_BLANKS == 0) {
6708
1.16k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
1.16k
               "Space required after 'ELEMENT'\n");
6710
1.16k
      return(-1);
6711
1.16k
  }
6712
889k
        name = xmlParseName(ctxt);
6713
889k
  if (name == NULL) {
6714
1.04k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
1.04k
         "xmlParseElementDecl: no name for Element\n");
6716
1.04k
      return(-1);
6717
1.04k
  }
6718
888k
  if (SKIP_BLANKS == 0) {
6719
2.31k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
2.31k
         "Space required after the element name\n");
6721
2.31k
  }
6722
888k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
214k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
214k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
674k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
674k
             (NXT(2) == 'Y')) {
6730
1.79k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
1.79k
      ret = XML_ELEMENT_TYPE_ANY;
6735
672k
  } else if (RAW == '(') {
6736
669k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
669k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
3.31k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
3.31k
          (ctxt->inputNr == 1)) {
6743
175
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
175
    "PEReference: forbidden within markup decl in internal subset\n");
6745
3.14k
      } else {
6746
3.14k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
3.14k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
3.14k
            }
6749
3.31k
      return(-1);
6750
3.31k
  }
6751
6752
885k
  SKIP_BLANKS;
6753
6754
885k
  if (RAW != '>') {
6755
7.80k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
7.80k
      if (content != NULL) {
6757
1.65k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
1.65k
      }
6759
877k
  } else {
6760
877k
      if (inputid != ctxt->input->id) {
6761
63
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
63
                               "Element declaration doesn't start and stop in"
6763
63
                               " the same entity\n");
6764
63
      }
6765
6766
877k
      NEXT;
6767
877k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
877k
    (ctxt->sax->elementDecl != NULL)) {
6769
802k
    if (content != NULL)
6770
603k
        content->parent = NULL;
6771
802k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
802k
                           content);
6773
802k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
114k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
114k
    }
6782
802k
      } else if (content != NULL) {
6783
55.8k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
55.8k
      }
6785
877k
  }
6786
885k
    }
6787
886k
    return(ret);
6788
891k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
3.84k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
3.84k
    int *inputIds = NULL;
6806
3.84k
    size_t inputIdsSize = 0;
6807
3.84k
    size_t depth = 0;
6808
6809
33.4k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
33.2k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
13.7k
            int id = ctxt->input->id;
6812
6813
13.7k
            SKIP(3);
6814
13.7k
            SKIP_BLANKS;
6815
6816
13.7k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
12.0k
                SKIP(7);
6818
12.0k
                SKIP_BLANKS;
6819
12.0k
                if (RAW != '[') {
6820
36
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
36
                    xmlHaltParser(ctxt);
6822
36
                    goto error;
6823
36
                }
6824
12.0k
                if (ctxt->input->id != id) {
6825
87
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
87
                                   "All markup of the conditional section is"
6827
87
                                   " not in the same entity\n");
6828
87
                }
6829
12.0k
                NEXT;
6830
6831
12.0k
                if (inputIdsSize <= depth) {
6832
3.34k
                    int *tmp;
6833
6834
3.34k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
3.34k
                    tmp = (int *) xmlRealloc(inputIds,
6836
3.34k
                            inputIdsSize * sizeof(int));
6837
3.34k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
3.34k
                    inputIds = tmp;
6842
3.34k
                }
6843
12.0k
                inputIds[depth] = id;
6844
12.0k
                depth++;
6845
12.0k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
1.28k
                size_t ignoreDepth = 0;
6847
6848
1.28k
                SKIP(6);
6849
1.28k
                SKIP_BLANKS;
6850
1.28k
                if (RAW != '[') {
6851
42
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
42
                    xmlHaltParser(ctxt);
6853
42
                    goto error;
6854
42
                }
6855
1.24k
                if (ctxt->input->id != id) {
6856
12
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
12
                                   "All markup of the conditional section is"
6858
12
                                   " not in the same entity\n");
6859
12
                }
6860
1.24k
                NEXT;
6861
6862
4.20M
                while (RAW != 0) {
6863
4.20M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
3.42k
                        SKIP(3);
6865
3.42k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
3.42k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
4.19M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
4.19M
                               (NXT(2) == '>')) {
6873
3.18k
                        if (ignoreDepth == 0)
6874
753
                            break;
6875
2.43k
                        SKIP(3);
6876
2.43k
                        ignoreDepth--;
6877
4.19M
                    } else {
6878
4.19M
                        NEXT;
6879
4.19M
                    }
6880
4.20M
                }
6881
6882
1.24k
    if (RAW == 0) {
6883
492
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
492
                    goto error;
6885
492
    }
6886
753
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
753
                SKIP(3);
6892
753
            } else {
6893
418
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
418
                xmlHaltParser(ctxt);
6895
418
                goto error;
6896
418
            }
6897
19.5k
        } else if ((depth > 0) &&
6898
19.5k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
5.71k
            depth--;
6900
5.71k
            if (ctxt->input->id != inputIds[depth]) {
6901
210
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
210
                               "All markup of the conditional section is not"
6903
210
                               " in the same entity\n");
6904
210
            }
6905
5.71k
            SKIP(3);
6906
13.8k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
12.6k
            xmlParseMarkupDecl(ctxt);
6908
12.6k
        } else {
6909
1.13k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
1.13k
            xmlHaltParser(ctxt);
6911
1.13k
            goto error;
6912
1.13k
        }
6913
6914
31.1k
        if (depth == 0)
6915
1.52k
            break;
6916
6917
29.6k
        SKIP_BLANKS;
6918
29.6k
        GROW;
6919
29.6k
    }
6920
6921
3.84k
error:
6922
3.84k
    xmlFree(inputIds);
6923
3.84k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
14.9M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
14.9M
    GROW;
6952
14.9M
    if (CUR == '<') {
6953
14.9M
        if (NXT(1) == '!') {
6954
14.8M
      switch (NXT(2)) {
6955
1.54M
          case 'E':
6956
1.54M
        if (NXT(3) == 'L')
6957
891k
      xmlParseElementDecl(ctxt);
6958
654k
        else if (NXT(3) == 'N')
6959
653k
      xmlParseEntityDecl(ctxt);
6960
263
                    else
6961
263
                        SKIP(2);
6962
1.54M
        break;
6963
786k
          case 'A':
6964
786k
        xmlParseAttributeListDecl(ctxt);
6965
786k
        break;
6966
21.4k
          case 'N':
6967
21.4k
        xmlParseNotationDecl(ctxt);
6968
21.4k
        break;
6969
12.5M
          case '-':
6970
12.5M
        xmlParseComment(ctxt);
6971
12.5M
        break;
6972
7.88k
    default:
6973
        /* there is an error but it will be detected later */
6974
7.88k
                    SKIP(2);
6975
7.88k
        break;
6976
14.8M
      }
6977
14.8M
  } else if (NXT(1) == '?') {
6978
16.8k
      xmlParsePI(ctxt);
6979
16.8k
  }
6980
14.9M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
14.9M
    if (ctxt->instate == XML_PARSER_EOF)
6987
7.27k
        return;
6988
6989
14.8M
    ctxt->instate = XML_PARSER_DTD;
6990
14.8M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
18.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
18.9k
    xmlChar *version;
7006
18.9k
    const xmlChar *encoding;
7007
18.9k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
18.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
18.8k
  SKIP(5);
7014
18.8k
    } else {
7015
111
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
111
  return;
7017
111
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
18.8k
    oldstate = ctxt->instate;
7021
18.8k
    ctxt->instate = XML_PARSER_START;
7022
7023
18.8k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
18.8k
    version = xmlParseVersionInfo(ctxt);
7032
18.8k
    if (version == NULL)
7033
5.63k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
13.2k
    else {
7035
13.2k
  if (SKIP_BLANKS == 0) {
7036
1.06k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.06k
               "Space needed here\n");
7038
1.06k
  }
7039
13.2k
    }
7040
18.8k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
18.8k
    encoding = xmlParseEncodingDecl(ctxt);
7046
18.8k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
18.8k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
687
        ctxt->instate = oldstate;
7053
687
        return;
7054
687
    }
7055
18.1k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
2.05k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
2.05k
           "Missing encoding in text declaration\n");
7058
2.05k
    }
7059
7060
18.1k
    SKIP_BLANKS;
7061
18.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
3.95k
        SKIP(2);
7063
14.1k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
387
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
387
  NEXT;
7067
13.8k
    } else {
7068
13.8k
        int c;
7069
7070
13.8k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
996k
        while ((c = CUR) != 0) {
7072
987k
            NEXT;
7073
987k
            if (c == '>')
7074
4.66k
                break;
7075
987k
        }
7076
13.8k
    }
7077
7078
18.1k
    ctxt->instate = oldstate;
7079
18.1k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
20.2k
                       const xmlChar *SystemID) {
7096
20.2k
    xmlDetectSAX2(ctxt);
7097
20.2k
    GROW;
7098
7099
20.2k
    if ((ctxt->encoding == NULL) &&
7100
20.2k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
20.2k
        xmlChar start[4];
7102
20.2k
  xmlCharEncoding enc;
7103
7104
20.2k
  start[0] = RAW;
7105
20.2k
  start[1] = NXT(1);
7106
20.2k
  start[2] = NXT(2);
7107
20.2k
  start[3] = NXT(3);
7108
20.2k
  enc = xmlDetectCharEncoding(start, 4);
7109
20.2k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
3.18k
      xmlSwitchEncoding(ctxt, enc);
7111
20.2k
    }
7112
7113
20.2k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
3.01k
  xmlParseTextDecl(ctxt);
7115
3.01k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
39
      xmlHaltParser(ctxt);
7120
39
      return;
7121
39
  }
7122
3.01k
    }
7123
20.2k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
20.2k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
20.2k
    ctxt->instate = XML_PARSER_DTD;
7135
20.2k
    ctxt->external = 1;
7136
20.2k
    SKIP_BLANKS;
7137
1.11M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
1.09M
  GROW;
7139
1.09M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
3.84k
            xmlParseConditionalSections(ctxt);
7141
1.09M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
1.08M
            xmlParseMarkupDecl(ctxt);
7143
1.08M
        } else {
7144
4.01k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
4.01k
            xmlHaltParser(ctxt);
7146
4.01k
            return;
7147
4.01k
        }
7148
1.09M
        SKIP_BLANKS;
7149
1.09M
    }
7150
7151
16.2k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
16.2k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
3.19M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
3.19M
    xmlEntityPtr ent;
7175
3.19M
    xmlChar *val;
7176
3.19M
    int was_checked;
7177
3.19M
    xmlNodePtr list = NULL;
7178
3.19M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
3.19M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
3.19M
    if (NXT(1) == '#') {
7188
147k
  int i = 0;
7189
147k
  xmlChar out[16];
7190
147k
  int hex = NXT(2);
7191
147k
  int value = xmlParseCharRef(ctxt);
7192
7193
147k
  if (value == 0)
7194
17.6k
      return;
7195
129k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
89.1k
      if (value <= 0xFF) {
7202
86.7k
    out[0] = value;
7203
86.7k
    out[1] = 0;
7204
86.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
86.7k
        (!ctxt->disableSAX))
7206
67.7k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
86.7k
      } else {
7208
2.40k
    if ((hex == 'x') || (hex == 'X'))
7209
451
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
1.95k
    else
7211
1.95k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
2.40k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
2.40k
        (!ctxt->disableSAX))
7214
1.41k
        ctxt->sax->reference(ctxt->userData, out);
7215
2.40k
      }
7216
89.1k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
40.7k
      COPY_BUF(0 ,out, i, value);
7221
40.7k
      out[i] = 0;
7222
40.7k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
40.7k
    (!ctxt->disableSAX))
7224
31.8k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
40.7k
  }
7226
129k
  return;
7227
147k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
3.05M
    ent = xmlParseEntityRef(ctxt);
7233
3.05M
    if (ent == NULL) return;
7234
2.10M
    if (!ctxt->wellFormed)
7235
824k
  return;
7236
1.28M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
1.28M
    if ((ent->name == NULL) ||
7240
1.28M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
83.3k
  val = ent->content;
7242
83.3k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
83.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
83.3k
      (!ctxt->disableSAX))
7248
83.3k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
83.3k
  return;
7250
83.3k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
1.19M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
1.19M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
75.4k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
72.4k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
72.4k
  void *user_data;
7273
72.4k
  if (ctxt->userData == ctxt)
7274
72.4k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
72.4k
        ctxt->sizeentcopy = 0;
7280
7281
72.4k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
224
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
224
            xmlHaltParser(ctxt);
7284
224
            return;
7285
224
        }
7286
7287
72.2k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
72.2k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
40.2k
      ctxt->depth++;
7297
40.2k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
40.2k
                                                user_data, &list);
7299
40.2k
      ctxt->depth--;
7300
7301
40.2k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
31.9k
      ctxt->depth++;
7303
31.9k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
31.9k
                                     user_data, ctxt->depth, ent->URI,
7305
31.9k
             ent->ExternalID, &list);
7306
31.9k
      ctxt->depth--;
7307
31.9k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
72.2k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
72.2k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
72.2k
        ent->expandedSize = ctxt->sizeentcopy;
7316
72.2k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
3.82k
            xmlHaltParser(ctxt);
7318
3.82k
      xmlFreeNodeList(list);
7319
3.82k
      return;
7320
3.82k
  }
7321
68.4k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
68.4k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
35.7k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
35.7k
            if ((ctxt->replaceEntities == 0) ||
7333
35.7k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
35.7k
                ((list->type == XML_TEXT_NODE) &&
7335
28.8k
                 (list->next == NULL))) {
7336
28.8k
                ent->owner = 1;
7337
145k
                while (list != NULL) {
7338
117k
                    list->parent = (xmlNodePtr) ent;
7339
117k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
117k
                    if (list->next == NULL)
7342
28.8k
                        ent->last = list;
7343
117k
                    list = list->next;
7344
117k
                }
7345
28.8k
                list = NULL;
7346
28.8k
            } else {
7347
6.90k
                ent->owner = 0;
7348
129k
                while (list != NULL) {
7349
122k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
122k
                    list->doc = ctxt->myDoc;
7351
122k
                    if (list->next == NULL)
7352
6.90k
                        ent->last = list;
7353
122k
                    list = list->next;
7354
122k
                }
7355
6.90k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
6.90k
            }
7361
35.7k
  } else if ((ret != XML_ERR_OK) &&
7362
32.6k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
16.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
16.9k
         "Entity '%s' failed to parse\n", ent->name);
7365
16.9k
            if (ent->content != NULL)
7366
3.80k
                ent->content[0] = 0;
7367
16.9k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
68.4k
        was_checked = 0;
7374
68.4k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
1.19M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
325k
  if (was_checked != 0) {
7389
289k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
289k
      if (ctxt->userData == ctxt)
7396
289k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
289k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
15.6k
    ctxt->depth++;
7402
15.6k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
15.6k
           ent->content, user_data, NULL);
7404
15.6k
    ctxt->depth--;
7405
273k
      } else if (ent->etype ==
7406
273k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
273k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
273k
    ctxt->depth++;
7410
273k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
273k
         ctxt->sax, user_data, ctxt->depth,
7412
273k
         ent->URI, ent->ExternalID, NULL);
7413
273k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
273k
                ctxt->sizeentities = oldsizeentities;
7417
273k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
289k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
289k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
289k
  }
7429
325k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
325k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
76.6k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
76.6k
  }
7437
325k
  return;
7438
325k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
867k
    if ((was_checked != 0) &&
7445
867k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
141
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
867k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
867k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
212k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
212k
  return;
7458
212k
    }
7459
7460
655k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
655k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
655k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
655k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
204k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
204k
    cur = ent->children;
7492
369k
    while (cur != NULL) {
7493
369k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
369k
        if (nw != NULL) {
7495
369k
      if (nw->_private == NULL)
7496
369k
          nw->_private = cur->_private;
7497
369k
      if (firstChild == NULL){
7498
204k
          firstChild = nw;
7499
204k
      }
7500
369k
      nw = xmlAddChild(ctxt->node, nw);
7501
369k
        }
7502
369k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
204k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
204k
          (nw != NULL) &&
7509
204k
          (nw->type == XML_ELEMENT_NODE) &&
7510
204k
          (nw->children == NULL))
7511
5.96k
          nw->extra = 1;
7512
7513
204k
      break;
7514
204k
        }
7515
164k
        cur = cur->next;
7516
164k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
450k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
450k
    xmlNodePtr nw = NULL, cur, next, last,
7523
450k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
450k
    cur = ent->children;
7532
450k
    ent->children = NULL;
7533
450k
    last = ent->last;
7534
450k
    ent->last = NULL;
7535
1.16M
    while (cur != NULL) {
7536
1.16M
        next = cur->next;
7537
1.16M
        cur->next = NULL;
7538
1.16M
        cur->parent = NULL;
7539
1.16M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
1.16M
        if (nw != NULL) {
7541
1.16M
      if (nw->_private == NULL)
7542
1.16M
          nw->_private = cur->_private;
7543
1.16M
      if (firstChild == NULL){
7544
450k
          firstChild = cur;
7545
450k
      }
7546
1.16M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
1.16M
        }
7548
1.16M
        xmlAddChild(ctxt->node, cur);
7549
1.16M
        if (cur == last)
7550
450k
      break;
7551
719k
        cur = next;
7552
719k
    }
7553
450k
    if (ent->owner == 0)
7554
6.90k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
450k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
655k
      ctxt->nodemem = 0;
7582
655k
      ctxt->nodelen = 0;
7583
655k
      return;
7584
655k
  }
7585
655k
    }
7586
655k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
4.37M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
4.37M
    const xmlChar *name;
7621
4.37M
    xmlEntityPtr ent = NULL;
7622
7623
4.37M
    GROW;
7624
4.37M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
4.37M
    if (RAW != '&')
7628
0
        return(NULL);
7629
4.37M
    NEXT;
7630
4.37M
    name = xmlParseName(ctxt);
7631
4.37M
    if (name == NULL) {
7632
67.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
67.7k
           "xmlParseEntityRef: no name\n");
7634
67.7k
        return(NULL);
7635
67.7k
    }
7636
4.30M
    if (RAW != ';') {
7637
65.8k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
65.8k
  return(NULL);
7639
65.8k
    }
7640
4.24M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
4.24M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
2.92M
        ent = xmlGetPredefinedEntity(name);
7647
2.92M
        if (ent != NULL)
7648
157k
            return(ent);
7649
2.92M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
4.08M
    if (ctxt->sax != NULL) {
7656
4.08M
  if (ctxt->sax->getEntity != NULL)
7657
4.08M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
4.08M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
4.08M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
23.7k
      ent = xmlGetPredefinedEntity(name);
7661
4.08M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
4.08M
      (ctxt->userData==ctxt)) {
7663
72.1k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
72.1k
  }
7665
4.08M
    }
7666
4.08M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
4.08M
    if (ent == NULL) {
7690
987k
  if ((ctxt->standalone == 1) ||
7691
987k
      ((ctxt->hasExternalSubset == 0) &&
7692
947k
       (ctxt->hasPErefs == 0))) {
7693
593k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
593k
         "Entity '%s' not defined\n", name);
7695
593k
  } else {
7696
394k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
394k
         "Entity '%s' not defined\n", name);
7698
394k
      if ((ctxt->inSubset == 0) &&
7699
394k
    (ctxt->sax != NULL) &&
7700
394k
    (ctxt->sax->reference != NULL)) {
7701
384k
    ctxt->sax->reference(ctxt->userData, name);
7702
384k
      }
7703
394k
  }
7704
987k
  ctxt->valid = 0;
7705
987k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
3.09M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
1.13k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
1.13k
     "Entity reference to unparsed entity %s\n", name);
7715
1.13k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
3.09M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
3.09M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
8.13k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
8.13k
       "Attribute references external entity '%s'\n", name);
7726
8.13k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
3.08M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
3.08M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.10M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
17.0k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
478
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
17.0k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
17.0k
        }
7740
1.10M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
10.2k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
10.2k
                    "'<' in entity '%s' is not allowed in attributes "
7743
10.2k
                    "values\n", name);
7744
1.10M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
1.98M
    else {
7750
1.98M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
1.98M
      default:
7758
1.98M
      break;
7759
1.98M
  }
7760
1.98M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
4.08M
    return(ent);
7769
4.08M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
12.7M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
12.7M
    xmlChar *name;
7805
12.7M
    const xmlChar *ptr;
7806
12.7M
    xmlChar cur;
7807
12.7M
    xmlEntityPtr ent = NULL;
7808
7809
12.7M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
12.7M
    ptr = *str;
7812
12.7M
    cur = *ptr;
7813
12.7M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
12.7M
    ptr++;
7817
12.7M
    name = xmlParseStringName(ctxt, &ptr);
7818
12.7M
    if (name == NULL) {
7819
3.94k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
3.94k
           "xmlParseStringEntityRef: no name\n");
7821
3.94k
  *str = ptr;
7822
3.94k
  return(NULL);
7823
3.94k
    }
7824
12.7M
    if (*ptr != ';') {
7825
6.43k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
6.43k
        xmlFree(name);
7827
6.43k
  *str = ptr;
7828
6.43k
  return(NULL);
7829
6.43k
    }
7830
12.7M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
12.7M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
11.8M
        ent = xmlGetPredefinedEntity(name);
7838
11.8M
        if (ent != NULL) {
7839
43.7k
            xmlFree(name);
7840
43.7k
            *str = ptr;
7841
43.7k
            return(ent);
7842
43.7k
        }
7843
11.8M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
12.7M
    if (ctxt->sax != NULL) {
7850
12.7M
  if (ctxt->sax->getEntity != NULL)
7851
12.7M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
12.7M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
27.6k
      ent = xmlGetPredefinedEntity(name);
7854
12.7M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
83.2k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
83.2k
  }
7857
12.7M
    }
7858
12.7M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
12.7M
    if (ent == NULL) {
7885
83.2k
  if ((ctxt->standalone == 1) ||
7886
83.2k
      ((ctxt->hasExternalSubset == 0) &&
7887
74.4k
       (ctxt->hasPErefs == 0))) {
7888
74.4k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
74.4k
         "Entity '%s' not defined\n", name);
7890
74.4k
  } else {
7891
8.75k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
8.75k
        "Entity '%s' not defined\n",
7893
8.75k
        name);
7894
8.75k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
83.2k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
12.6M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
1.19k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
1.19k
     "Entity reference to unparsed entity %s\n", name);
7906
1.19k
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
12.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
12.6M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
10.0k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
10.0k
   "Attribute references external entity '%s'\n", name);
7917
10.0k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
12.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
12.6M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
12.3M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
5.87k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
486
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
5.87k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
5.87k
        }
7931
12.3M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
72.2k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
72.2k
                    "'<' in entity '%s' is not allowed in attributes "
7934
72.2k
                    "values\n", name);
7935
12.3M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
305k
    else {
7941
305k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
305k
      default:
7949
305k
      break;
7950
305k
  }
7951
305k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
12.7M
    xmlFree(name);
7961
12.7M
    *str = ptr;
7962
12.7M
    return(ent);
7963
12.7M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
14.7M
{
8000
14.7M
    const xmlChar *name;
8001
14.7M
    xmlEntityPtr entity = NULL;
8002
14.7M
    xmlParserInputPtr input;
8003
8004
14.7M
    if (RAW != '%')
8005
0
        return;
8006
14.7M
    NEXT;
8007
14.7M
    name = xmlParseName(ctxt);
8008
14.7M
    if (name == NULL) {
8009
6.35k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
6.35k
  return;
8011
6.35k
    }
8012
14.7M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
14.7M
    if (RAW != ';') {
8016
11.0k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
11.0k
        return;
8018
11.0k
    }
8019
8020
14.7M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
14.7M
    if ((ctxt->sax != NULL) &&
8026
14.7M
  (ctxt->sax->getParameterEntity != NULL))
8027
14.7M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
14.7M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
14.7M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
1.66M
  if ((ctxt->standalone == 1) ||
8040
1.66M
      ((ctxt->hasExternalSubset == 0) &&
8041
1.66M
       (ctxt->hasPErefs == 0))) {
8042
635
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
635
            "PEReference: %%%s; not found\n",
8044
635
            name);
8045
1.66M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
1.66M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
13.0k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
13.0k
                                 "PEReference: %%%s; not found\n",
8056
13.0k
                                 name, NULL);
8057
13.0k
            } else
8058
1.64M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
1.64M
                              "PEReference: %%%s; not found\n",
8060
1.64M
                              name, NULL);
8061
1.66M
            ctxt->valid = 0;
8062
1.66M
  }
8063
13.0M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
13.0M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
13.0M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
13.0M
  } else {
8073
13.0M
            xmlChar start[4];
8074
13.0M
            xmlCharEncoding enc;
8075
13.0M
            unsigned long parentConsumed;
8076
13.0M
            xmlEntityPtr oldEnt;
8077
8078
13.0M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
13.0M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
13.0M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
13.0M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
13.0M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
13.0M
    (ctxt->replaceEntities == 0) &&
8084
13.0M
    (ctxt->validate == 0))
8085
714
    return;
8086
8087
13.0M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
287
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
287
                xmlHaltParser(ctxt);
8090
287
                return;
8091
287
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
13.0M
            parentConsumed = ctxt->input->parentConsumed;
8095
13.0M
            oldEnt = ctxt->input->entity;
8096
13.0M
            if ((oldEnt == NULL) ||
8097
13.0M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
12.6M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
805k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
805k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
805k
                                     ctxt->input->cur - ctxt->input->base);
8102
805k
            }
8103
8104
13.0M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
13.0M
      if (xmlPushInput(ctxt, input) < 0) {
8106
5.50k
                xmlFreeInputStream(input);
8107
5.50k
    return;
8108
5.50k
            }
8109
8110
13.0M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
13.0M
            input->parentConsumed = parentConsumed;
8113
8114
13.0M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
34.1k
                GROW
8125
34.1k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
34.1k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
33.8k
                    start[0] = RAW;
8129
33.8k
                    start[1] = NXT(1);
8130
33.8k
                    start[2] = NXT(2);
8131
33.8k
                    start[3] = NXT(3);
8132
33.8k
                    enc = xmlDetectCharEncoding(start, 4);
8133
33.8k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
17.4k
                        xmlSwitchEncoding(ctxt, enc);
8135
17.4k
                    }
8136
33.8k
                }
8137
8138
34.1k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
34.1k
                    (IS_BLANK_CH(NXT(5)))) {
8140
14.9k
                    xmlParseTextDecl(ctxt);
8141
14.9k
                }
8142
34.1k
            }
8143
13.0M
  }
8144
13.0M
    }
8145
14.7M
    ctxt->hasPErefs = 1;
8146
14.7M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
2.24k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
2.24k
    xmlParserInputPtr input;
8162
2.24k
    xmlBufferPtr buf;
8163
2.24k
    int l, c;
8164
2.24k
    int count = 0;
8165
8166
2.24k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
2.24k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
2.24k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
2.24k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
2.24k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
2.24k
    buf = xmlBufferCreate();
8180
2.24k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
2.24k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
2.24k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
2.24k
    if (input == NULL) {
8189
633
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
633
              "xmlLoadEntityContent input error");
8191
633
  xmlBufferFree(buf);
8192
633
        return(-1);
8193
633
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
1.61k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
1.61k
    GROW;
8206
1.61k
    c = CUR_CHAR(l);
8207
10.3M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
10.3M
           (IS_CHAR(c))) {
8209
10.3M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
10.3M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
100k
      count = 0;
8212
100k
      GROW;
8213
100k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
100k
  }
8218
10.3M
  NEXTL(l);
8219
10.3M
  c = CUR_CHAR(l);
8220
10.3M
  if (c == 0) {
8221
1.42k
      count = 0;
8222
1.42k
      GROW;
8223
1.42k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
1.42k
      c = CUR_CHAR(l);
8228
1.42k
  }
8229
10.3M
    }
8230
8231
1.61k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
1.14k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
1.14k
        xmlPopInput(ctxt);
8234
1.14k
    } else if (!IS_CHAR(c)) {
8235
463
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
463
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
463
                    c);
8238
463
  xmlBufferFree(buf);
8239
463
  return(-1);
8240
463
    }
8241
1.14k
    entity->content = buf->content;
8242
1.14k
    entity->length = buf->use;
8243
1.14k
    buf->content = NULL;
8244
1.14k
    xmlBufferFree(buf);
8245
8246
1.14k
    return(0);
8247
1.61k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
284k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
284k
    const xmlChar *ptr;
8283
284k
    xmlChar cur;
8284
284k
    xmlChar *name;
8285
284k
    xmlEntityPtr entity = NULL;
8286
8287
284k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
284k
    ptr = *str;
8289
284k
    cur = *ptr;
8290
284k
    if (cur != '%')
8291
0
        return(NULL);
8292
284k
    ptr++;
8293
284k
    name = xmlParseStringName(ctxt, &ptr);
8294
284k
    if (name == NULL) {
8295
7.05k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
7.05k
           "xmlParseStringPEReference: no name\n");
8297
7.05k
  *str = ptr;
8298
7.05k
  return(NULL);
8299
7.05k
    }
8300
277k
    cur = *ptr;
8301
277k
    if (cur != ';') {
8302
1.74k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
1.74k
  xmlFree(name);
8304
1.74k
  *str = ptr;
8305
1.74k
  return(NULL);
8306
1.74k
    }
8307
275k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
275k
    if ((ctxt->sax != NULL) &&
8313
275k
  (ctxt->sax->getParameterEntity != NULL))
8314
275k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
275k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
275k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
32.3k
  if ((ctxt->standalone == 1) ||
8330
32.3k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
240
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
240
     "PEReference: %%%s; not found\n", name);
8333
32.0k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
32.0k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
32.0k
        "PEReference: %%%s; not found\n",
8343
32.0k
        name, NULL);
8344
32.0k
      ctxt->valid = 0;
8345
32.0k
  }
8346
243k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
243k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
243k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
243k
    }
8357
275k
    ctxt->hasPErefs = 1;
8358
275k
    xmlFree(name);
8359
275k
    *str = ptr;
8360
275k
    return(entity);
8361
275k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
152k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
152k
    const xmlChar *name = NULL;
8382
152k
    xmlChar *ExternalID = NULL;
8383
152k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
152k
    SKIP(9);
8389
8390
152k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
152k
    name = xmlParseName(ctxt);
8396
152k
    if (name == NULL) {
8397
218
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
218
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
218
    }
8400
152k
    ctxt->intSubName = name;
8401
8402
152k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
152k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
152k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
67.7k
        ctxt->hasExternalSubset = 1;
8411
67.7k
    }
8412
152k
    ctxt->extSubURI = URI;
8413
152k
    ctxt->extSubSystem = ExternalID;
8414
8415
152k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
152k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
152k
  (!ctxt->disableSAX))
8422
150k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
152k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
152k
    if (RAW == '[')
8431
119k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
33.6k
    if (RAW != '>') {
8437
2.75k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
2.75k
    }
8439
33.6k
    NEXT;
8440
33.6k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
119k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
119k
    if (RAW == '[') {
8457
119k
        int baseInputNr = ctxt->inputNr;
8458
119k
        ctxt->instate = XML_PARSER_DTD;
8459
119k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
119k
  SKIP_BLANKS;
8466
13.9M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
13.9M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
13.8M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
13.8M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
13.8M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
13.7M
          xmlParseMarkupDecl(ctxt);
8478
13.7M
            } else if (RAW == '%') {
8479
23.2k
          xmlParsePEReference(ctxt);
8480
23.2k
            } else {
8481
19.5k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
19.5k
                        "xmlParseInternalSubset: error detected in"
8483
19.5k
                        " Markup declaration\n");
8484
19.5k
                xmlHaltParser(ctxt);
8485
19.5k
                return;
8486
19.5k
            }
8487
13.8M
      SKIP_BLANKS;
8488
13.8M
  }
8489
99.6k
  if (RAW == ']') {
8490
93.9k
      NEXT;
8491
93.9k
      SKIP_BLANKS;
8492
93.9k
  }
8493
99.6k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
99.6k
    if (RAW != '>') {
8499
6.14k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
6.14k
  return;
8501
6.14k
    }
8502
93.4k
    NEXT;
8503
93.4k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
2.62M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
2.62M
    const xmlChar *name;
8544
2.62M
    xmlChar *val;
8545
8546
2.62M
    *value = NULL;
8547
2.62M
    GROW;
8548
2.62M
    name = xmlParseName(ctxt);
8549
2.62M
    if (name == NULL) {
8550
136k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
136k
                 "error parsing attribute name\n");
8552
136k
        return(NULL);
8553
136k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
2.49M
    SKIP_BLANKS;
8559
2.49M
    if (RAW == '=') {
8560
2.41M
        NEXT;
8561
2.41M
  SKIP_BLANKS;
8562
2.41M
  val = xmlParseAttValue(ctxt);
8563
2.41M
  ctxt->instate = XML_PARSER_CONTENT;
8564
2.41M
    } else {
8565
79.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
79.8k
         "Specification mandates value for attribute %s\n", name);
8567
79.8k
  return(name);
8568
79.8k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
2.41M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
4.00k
  if (!xmlCheckLanguageID(val)) {
8577
2.20k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
2.20k
              "Malformed value for xml:lang : %s\n",
8579
2.20k
        val, NULL);
8580
2.20k
  }
8581
4.00k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
2.41M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
549
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
36
      *(ctxt->space) = 0;
8589
513
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
262
      *(ctxt->space) = 1;
8591
251
  else {
8592
251
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
251
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
251
                                 val, NULL);
8595
251
  }
8596
549
    }
8597
8598
2.41M
    *value = val;
8599
2.41M
    return(name);
8600
2.49M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
2.89M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
2.89M
    const xmlChar *name;
8634
2.89M
    const xmlChar *attname;
8635
2.89M
    xmlChar *attvalue;
8636
2.89M
    const xmlChar **atts = ctxt->atts;
8637
2.89M
    int nbatts = 0;
8638
2.89M
    int maxatts = ctxt->maxatts;
8639
2.89M
    int i;
8640
8641
2.89M
    if (RAW != '<') return(NULL);
8642
2.89M
    NEXT1;
8643
8644
2.89M
    name = xmlParseName(ctxt);
8645
2.89M
    if (name == NULL) {
8646
94.5k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
94.5k
       "xmlParseStartTag: invalid element name\n");
8648
94.5k
        return(NULL);
8649
94.5k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
2.80M
    SKIP_BLANKS;
8657
2.80M
    GROW;
8658
8659
3.80M
    while (((RAW != '>') &&
8660
3.80M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
3.80M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
2.62M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
2.62M
        if (attname == NULL) {
8664
136k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
136k
         "xmlParseStartTag: problem parsing attributes\n");
8666
136k
      break;
8667
136k
  }
8668
2.49M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
3.26M
      for (i = 0; i < nbatts;i += 2) {
8675
873k
          if (xmlStrEqual(atts[i], attname)) {
8676
4.45k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
4.45k
        xmlFree(attvalue);
8678
4.45k
        goto failed;
8679
4.45k
    }
8680
873k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
2.39M
      if (atts == NULL) {
8685
44.8k
          maxatts = 22; /* allow for 10 attrs by default */
8686
44.8k
          atts = (const xmlChar **)
8687
44.8k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
44.8k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
44.8k
    ctxt->atts = atts;
8695
44.8k
    ctxt->maxatts = maxatts;
8696
2.35M
      } else if (nbatts + 4 > maxatts) {
8697
63
          const xmlChar **n;
8698
8699
63
          maxatts *= 2;
8700
63
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
63
               maxatts * sizeof(const xmlChar *));
8702
63
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
63
    atts = n;
8709
63
    ctxt->atts = atts;
8710
63
    ctxt->maxatts = maxatts;
8711
63
      }
8712
2.39M
      atts[nbatts++] = attname;
8713
2.39M
      atts[nbatts++] = attvalue;
8714
2.39M
      atts[nbatts] = NULL;
8715
2.39M
      atts[nbatts + 1] = NULL;
8716
2.39M
  } else {
8717
90.8k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
90.8k
  }
8720
8721
2.49M
failed:
8722
8723
2.49M
  GROW
8724
2.49M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
1.49M
      break;
8726
1.00M
  if (SKIP_BLANKS == 0) {
8727
156k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
156k
         "attributes construct error\n");
8729
156k
  }
8730
1.00M
  SHRINK;
8731
1.00M
        GROW;
8732
1.00M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
2.80M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
2.80M
  (!ctxt->disableSAX)) {
8739
2.46M
  if (nbatts > 0)
8740
1.33M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
1.13M
  else
8742
1.13M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
2.46M
    }
8744
8745
2.80M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
5.04M
        for (i = 1;i < nbatts;i+=2)
8748
2.39M
      if (atts[i] != NULL)
8749
2.39M
         xmlFree((xmlChar *) atts[i]);
8750
2.65M
    }
8751
2.80M
    return(name);
8752
2.80M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.65M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.65M
    const xmlChar *name;
8772
8773
1.65M
    GROW;
8774
1.65M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.65M
    SKIP(2);
8780
8781
1.65M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.65M
    GROW;
8787
1.65M
    SKIP_BLANKS;
8788
1.65M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
18.4k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
18.4k
    } else
8791
1.63M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.65M
    if (name != (xmlChar*)1) {
8800
70.0k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
70.0k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
70.0k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
70.0k
                    ctxt->name, line, name);
8804
70.0k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.65M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.65M
  (!ctxt->disableSAX))
8811
1.51M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.65M
    namePop(ctxt);
8814
1.65M
    spacePop(ctxt);
8815
1.65M
    return;
8816
1.65M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
4.66M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
4.66M
    int i;
8858
8859
4.66M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
5.27M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
1.03M
        if (ctxt->nsTab[i] == prefix) {
8862
362k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
7.01k
          return(NULL);
8864
355k
      return(ctxt->nsTab[i + 1]);
8865
362k
  }
8866
4.23M
    return(NULL);
8867
4.59M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
9.61M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
9.61M
    const xmlChar *l, *p;
8886
8887
9.61M
    GROW;
8888
8889
9.61M
    l = xmlParseNCName(ctxt);
8890
9.61M
    if (l == NULL) {
8891
225k
        if (CUR == ':') {
8892
5.30k
      l = xmlParseName(ctxt);
8893
5.30k
      if (l != NULL) {
8894
5.30k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
5.30k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
5.30k
    *prefix = NULL;
8897
5.30k
    return(l);
8898
5.30k
      }
8899
5.30k
  }
8900
220k
        return(NULL);
8901
225k
    }
8902
9.39M
    if (CUR == ':') {
8903
512k
        NEXT;
8904
512k
  p = l;
8905
512k
  l = xmlParseNCName(ctxt);
8906
512k
  if (l == NULL) {
8907
6.78k
      xmlChar *tmp;
8908
8909
6.78k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
6.78k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
6.78k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
6.78k
      l = xmlParseNmtoken(ctxt);
8914
6.78k
      if (l == NULL) {
8915
5.08k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
5.08k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
5.08k
            } else {
8919
1.70k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
1.70k
    xmlFree((char *)l);
8921
1.70k
      }
8922
6.78k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
6.78k
      if (tmp != NULL) xmlFree(tmp);
8924
6.78k
      *prefix = NULL;
8925
6.78k
      return(p);
8926
6.78k
  }
8927
505k
  if (CUR == ':') {
8928
6.16k
      xmlChar *tmp;
8929
8930
6.16k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
6.16k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
6.16k
      NEXT;
8933
6.16k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
6.16k
      if (tmp != NULL) {
8935
5.09k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
5.09k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
5.09k
    if (tmp != NULL) xmlFree(tmp);
8938
5.09k
    *prefix = p;
8939
5.09k
    return(l);
8940
5.09k
      }
8941
1.06k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
1.06k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
1.06k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
1.06k
      if (tmp != NULL) xmlFree(tmp);
8946
1.06k
      *prefix = p;
8947
1.06k
      return(l);
8948
1.06k
  }
8949
499k
  *prefix = p;
8950
499k
    } else
8951
8.88M
        *prefix = NULL;
8952
9.38M
    return(l);
8953
9.39M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
121k
                        xmlChar const *prefix) {
8971
121k
    const xmlChar *cmp;
8972
121k
    const xmlChar *in;
8973
121k
    const xmlChar *ret;
8974
121k
    const xmlChar *prefix2;
8975
8976
121k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
121k
    GROW;
8979
121k
    in = ctxt->input->cur;
8980
8981
121k
    cmp = prefix;
8982
433k
    while (*in != 0 && *in == *cmp) {
8983
311k
  ++in;
8984
311k
  ++cmp;
8985
311k
    }
8986
121k
    if ((*cmp == 0) && (*in == ':')) {
8987
109k
        in++;
8988
109k
  cmp = name;
8989
879k
  while (*in != 0 && *in == *cmp) {
8990
770k
      ++in;
8991
770k
      ++cmp;
8992
770k
  }
8993
109k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
94.8k
            ctxt->input->col += in - ctxt->input->cur;
8996
94.8k
      ctxt->input->cur = in;
8997
94.8k
      return((const xmlChar*) 1);
8998
94.8k
  }
8999
109k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
26.6k
    ret = xmlParseQName (ctxt, &prefix2);
9004
26.6k
    if ((ret == name) && (prefix == prefix2))
9005
386
  return((const xmlChar*) 1);
9006
26.2k
    return ret;
9007
26.6k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
6.32k
    const xmlChar *oldbase = ctxt->input->base;\
9045
6.32k
    GROW;\
9046
6.32k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
6.32k
        return(NULL);\
9048
6.32k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
6.32k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
7.54M
{
9059
7.54M
    xmlChar limit = 0;
9060
7.54M
    const xmlChar *in = NULL, *start, *end, *last;
9061
7.54M
    xmlChar *ret = NULL;
9062
7.54M
    int line, col;
9063
7.54M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
2.72M
                    XML_MAX_HUGE_LENGTH :
9065
7.54M
                    XML_MAX_TEXT_LENGTH;
9066
9067
7.54M
    GROW;
9068
7.54M
    in = (xmlChar *) CUR_PTR;
9069
7.54M
    line = ctxt->input->line;
9070
7.54M
    col = ctxt->input->col;
9071
7.54M
    if (*in != '"' && *in != '\'') {
9072
28.4k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
28.4k
        return (NULL);
9074
28.4k
    }
9075
7.51M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
7.51M
    limit = *in++;
9083
7.51M
    col++;
9084
7.51M
    end = ctxt->input->end;
9085
7.51M
    start = in;
9086
7.51M
    if (in >= end) {
9087
381
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
381
    }
9089
7.51M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
346k
  while ((in < end) && (*in != limit) &&
9094
346k
         ((*in == 0x20) || (*in == 0x9) ||
9095
344k
          (*in == 0xA) || (*in == 0xD))) {
9096
156k
      if (*in == 0xA) {
9097
21.5k
          line++; col = 1;
9098
134k
      } else {
9099
134k
          col++;
9100
134k
      }
9101
156k
      in++;
9102
156k
      start = in;
9103
156k
      if (in >= end) {
9104
108
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
108
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
108
      }
9111
156k
  }
9112
1.90M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
1.90M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
1.71M
      col++;
9115
1.71M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
1.71M
      if (in >= end) {
9117
146
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
146
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
146
      }
9124
1.71M
  }
9125
189k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
192k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
331k
  while ((in < end) && (*in != limit) &&
9131
331k
         ((*in == 0x20) || (*in == 0x9) ||
9132
165k
          (*in == 0xA) || (*in == 0xD))) {
9133
141k
      if (*in == 0xA) {
9134
11.5k
          line++, col = 1;
9135
130k
      } else {
9136
130k
          col++;
9137
130k
      }
9138
141k
      in++;
9139
141k
      if (in >= end) {
9140
135
    const xmlChar *oldbase = ctxt->input->base;
9141
135
    GROW;
9142
135
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
135
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
135
    end = ctxt->input->end;
9151
135
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
135
      }
9157
141k
  }
9158
189k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
189k
  if (*in != limit) goto need_complex;
9164
7.32M
    } else {
9165
76.1M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
76.1M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
68.7M
      in++;
9168
68.7M
      col++;
9169
68.7M
      if (in >= end) {
9170
5.69k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
5.69k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
5.69k
      }
9177
68.7M
  }
9178
7.32M
  last = in;
9179
7.32M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
7.32M
  if (*in != limit) goto need_complex;
9185
7.32M
    }
9186
7.15M
    in++;
9187
7.15M
    col++;
9188
7.15M
    if (len != NULL) {
9189
4.72M
        if (alloc) *alloc = 0;
9190
4.72M
        *len = last - start;
9191
4.72M
        ret = (xmlChar *) start;
9192
4.72M
    } else {
9193
2.42M
        if (alloc) *alloc = 1;
9194
2.42M
        ret = xmlStrndup(start, last - start);
9195
2.42M
    }
9196
7.15M
    CUR_PTR = in;
9197
7.15M
    ctxt->input->line = line;
9198
7.15M
    ctxt->input->col = col;
9199
7.15M
    return ret;
9200
359k
need_complex:
9201
359k
    if (alloc) *alloc = 1;
9202
359k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
7.51M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
5.05M
{
9226
5.05M
    const xmlChar *name;
9227
5.05M
    xmlChar *val, *internal_val = NULL;
9228
5.05M
    int normalize = 0;
9229
9230
5.05M
    *value = NULL;
9231
5.05M
    GROW;
9232
5.05M
    name = xmlParseQName(ctxt, prefix);
9233
5.05M
    if (name == NULL) {
9234
78.7k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
78.7k
                       "error parsing attribute name\n");
9236
78.7k
        return (NULL);
9237
78.7k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
4.97M
    if (ctxt->attsSpecial != NULL) {
9243
819k
        int type;
9244
9245
819k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
819k
                                                 pref, elem, *prefix, name);
9247
819k
        if (type != 0)
9248
190k
            normalize = 1;
9249
819k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
4.97M
    SKIP_BLANKS;
9255
4.97M
    if (RAW == '=') {
9256
4.92M
        NEXT;
9257
4.92M
        SKIP_BLANKS;
9258
4.92M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
4.92M
        if (val == NULL)
9260
14.3k
            return (NULL);
9261
4.91M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
189k
      if (*alloc) {
9269
23.9k
          const xmlChar *val2;
9270
9271
23.9k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
23.9k
    if ((val2 != NULL) && (val2 != val)) {
9273
4.83k
        xmlFree(val);
9274
4.83k
        val = (xmlChar *) val2;
9275
4.83k
    }
9276
23.9k
      }
9277
189k
  }
9278
4.91M
        ctxt->instate = XML_PARSER_CONTENT;
9279
4.91M
    } else {
9280
47.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
47.4k
                          "Specification mandates value for attribute %s\n",
9282
47.4k
                          name);
9283
47.4k
        return (name);
9284
47.4k
    }
9285
9286
4.91M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
38.5k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
7.44k
            internal_val = xmlStrndup(val, *len);
9294
7.44k
            if (!xmlCheckLanguageID(internal_val)) {
9295
3.35k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
3.35k
                              "Malformed value for xml:lang : %s\n",
9297
3.35k
                              internal_val, NULL);
9298
3.35k
            }
9299
7.44k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
38.5k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
553
            internal_val = xmlStrndup(val, *len);
9306
553
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
37
                *(ctxt->space) = 0;
9308
516
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
201
                *(ctxt->space) = 1;
9310
315
            else {
9311
315
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
315
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
315
                              internal_val, NULL);
9314
315
            }
9315
553
        }
9316
38.5k
        if (internal_val) {
9317
7.99k
            xmlFree(internal_val);
9318
7.99k
        }
9319
38.5k
    }
9320
9321
4.91M
    *value = val;
9322
4.91M
    return (name);
9323
4.97M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
4.54M
                  const xmlChar **URI, int *tlen) {
9356
4.54M
    const xmlChar *localname;
9357
4.54M
    const xmlChar *prefix;
9358
4.54M
    const xmlChar *attname;
9359
4.54M
    const xmlChar *aprefix;
9360
4.54M
    const xmlChar *nsname;
9361
4.54M
    xmlChar *attvalue;
9362
4.54M
    const xmlChar **atts = ctxt->atts;
9363
4.54M
    int maxatts = ctxt->maxatts;
9364
4.54M
    int nratts, nbatts, nbdef, inputid;
9365
4.54M
    int i, j, nbNs, attval;
9366
4.54M
    unsigned long cur;
9367
4.54M
    int nsNr = ctxt->nsNr;
9368
9369
4.54M
    if (RAW != '<') return(NULL);
9370
4.54M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
4.54M
    SHRINK;
9380
4.54M
    cur = ctxt->input->cur - ctxt->input->base;
9381
4.54M
    inputid = ctxt->input->id;
9382
4.54M
    nbatts = 0;
9383
4.54M
    nratts = 0;
9384
4.54M
    nbdef = 0;
9385
4.54M
    nbNs = 0;
9386
4.54M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
4.54M
    ctxt->nsNr = nsNr;
9389
9390
4.54M
    localname = xmlParseQName(ctxt, &prefix);
9391
4.54M
    if (localname == NULL) {
9392
141k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
141k
           "StartTag: invalid element name\n");
9394
141k
        return(NULL);
9395
141k
    }
9396
4.39M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
4.39M
    SKIP_BLANKS;
9404
4.39M
    GROW;
9405
9406
6.41M
    while (((RAW != '>') &&
9407
6.41M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
6.41M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
5.05M
  int len = -1, alloc = 0;
9410
9411
5.05M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
5.05M
                               &aprefix, &attvalue, &len, &alloc);
9413
5.05M
        if (attname == NULL) {
9414
93.1k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
93.1k
           "xmlParseStartTag: problem parsing attributes\n");
9416
93.1k
      break;
9417
93.1k
  }
9418
4.95M
        if (attvalue == NULL)
9419
47.4k
            goto next_attr;
9420
4.91M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
4.91M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
30.0k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
30.0k
            xmlURIPtr uri;
9425
9426
30.0k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
30.0k
            if (*URL != 0) {
9434
29.3k
                uri = xmlParseURI((const char *) URL);
9435
29.3k
                if (uri == NULL) {
9436
14.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
14.1k
                             "xmlns: '%s' is not a valid URI\n",
9438
14.1k
                                       URL, NULL, NULL);
9439
15.2k
                } else {
9440
15.2k
                    if (uri->scheme == NULL) {
9441
2.40k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
2.40k
                                  "xmlns: URI %s is not absolute\n",
9443
2.40k
                                  URL, NULL, NULL);
9444
2.40k
                    }
9445
15.2k
                    xmlFreeURI(uri);
9446
15.2k
                }
9447
29.3k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
29.3k
                if ((len == 29) &&
9456
29.3k
                    (xmlStrEqual(URL,
9457
477
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
227
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
227
                         "reuse of the xmlns namespace name is forbidden\n",
9460
227
                             NULL, NULL, NULL);
9461
227
                    goto next_attr;
9462
227
                }
9463
29.3k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
34.4k
            for (j = 1;j <= nbNs;j++)
9468
6.05k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
1.39k
                    break;
9470
29.7k
            if (j <= nbNs)
9471
1.39k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
28.3k
            else
9473
28.3k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
4.88M
        } else if (aprefix == ctxt->str_xmlns) {
9476
44.4k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
44.4k
            xmlURIPtr uri;
9478
9479
44.4k
            if (attname == ctxt->str_xml) {
9480
345
                if (URL != ctxt->str_xml_ns) {
9481
345
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
345
                             "xml namespace prefix mapped to wrong URI\n",
9483
345
                             NULL, NULL, NULL);
9484
345
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
345
                goto next_attr;
9489
345
            }
9490
44.1k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
44.1k
            if (attname == ctxt->str_xmlns) {
9499
110
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
110
                         "redefinition of the xmlns prefix is forbidden\n",
9501
110
                         NULL, NULL, NULL);
9502
110
                goto next_attr;
9503
110
            }
9504
44.0k
            if ((len == 29) &&
9505
44.0k
                (xmlStrEqual(URL,
9506
818
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
50
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
50
                         "reuse of the xmlns namespace name is forbidden\n",
9509
50
                         NULL, NULL, NULL);
9510
50
                goto next_attr;
9511
50
            }
9512
43.9k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
225
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
225
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
225
                              attname, NULL, NULL);
9516
225
                goto next_attr;
9517
43.7k
            } else {
9518
43.7k
                uri = xmlParseURI((const char *) URL);
9519
43.7k
                if (uri == NULL) {
9520
10.9k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
10.9k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
10.9k
                                       attname, URL, NULL);
9523
32.7k
                } else {
9524
32.7k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
1.14k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
1.14k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
1.14k
                                  attname, URL, NULL);
9528
1.14k
                    }
9529
32.7k
                    xmlFreeURI(uri);
9530
32.7k
                }
9531
43.7k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
68.9k
            for (j = 1;j <= nbNs;j++)
9537
26.3k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
1.06k
                    break;
9539
43.7k
            if (j <= nbNs)
9540
1.06k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
42.6k
            else
9542
42.6k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
4.83M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
4.83M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
56.5k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
56.5k
                maxatts = ctxt->maxatts;
9553
56.5k
                atts = ctxt->atts;
9554
56.5k
            }
9555
4.83M
            ctxt->attallocs[nratts++] = alloc;
9556
4.83M
            atts[nbatts++] = attname;
9557
4.83M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
4.83M
            if (alloc)
9565
163k
                atts[nbatts++] = NULL;
9566
4.67M
            else
9567
4.67M
                atts[nbatts++] = ctxt->input->base;
9568
4.83M
            atts[nbatts++] = attvalue;
9569
4.83M
            attvalue += len;
9570
4.83M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
4.83M
            if (alloc != 0) attval = 1;
9575
4.83M
            attvalue = NULL; /* moved into atts */
9576
4.83M
        }
9577
9578
4.95M
next_attr:
9579
4.95M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
22.4k
            xmlFree(attvalue);
9581
22.4k
            attvalue = NULL;
9582
22.4k
        }
9583
9584
4.95M
  GROW
9585
4.95M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
4.95M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
2.79M
      break;
9589
2.16M
  if (SKIP_BLANKS == 0) {
9590
149k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
149k
         "attributes construct error\n");
9592
149k
      break;
9593
149k
  }
9594
2.01M
        GROW;
9595
2.01M
    }
9596
9597
4.39M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
9.23M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
4.83M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
4.67M
            const xmlChar *old = atts[i+2];
9612
4.67M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
4.67M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
4.67M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
4.67M
        }
9616
4.83M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
4.39M
    if (ctxt->attsDefault != NULL) {
9622
643k
        xmlDefAttrsPtr defaults;
9623
9624
643k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
643k
  if (defaults != NULL) {
9626
189k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
129k
          attname = defaults->values[5 * i];
9628
129k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
129k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
7.21k
        for (j = 1;j <= nbNs;j++)
9638
1.22k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
549
          break;
9640
6.54k
              if (j <= nbNs) continue;
9641
9642
5.99k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
5.99k
        if (nsname != defaults->values[5 * i + 2]) {
9644
2.68k
      if (nsPush(ctxt, NULL,
9645
2.68k
                 defaults->values[5 * i + 2]) > 0)
9646
2.52k
          nbNs++;
9647
2.68k
        }
9648
122k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
16.7k
        for (j = 1;j <= nbNs;j++)
9653
4.57k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
1.49k
          break;
9655
13.7k
              if (j <= nbNs) continue;
9656
9657
12.2k
        nsname = xmlGetNamespace(ctxt, attname);
9658
12.2k
        if (nsname != defaults->values[5 * i + 2]) {
9659
5.28k
      if (nsPush(ctxt, attname,
9660
5.28k
                 defaults->values[5 * i + 2]) > 0)
9661
5.07k
          nbNs++;
9662
5.28k
        }
9663
108k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
283k
        for (j = 0;j < nbatts;j+=5) {
9668
176k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
1.04k
          break;
9670
176k
        }
9671
108k
        if (j < nbatts) continue;
9672
9673
107k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
2.67k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
2.67k
      maxatts = ctxt->maxatts;
9679
2.67k
      atts = ctxt->atts;
9680
2.67k
        }
9681
107k
        atts[nbatts++] = attname;
9682
107k
        atts[nbatts++] = aprefix;
9683
107k
        if (aprefix == NULL)
9684
74.6k
      atts[nbatts++] = NULL;
9685
33.1k
        else
9686
33.1k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
107k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
107k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
107k
        if ((ctxt->standalone == 1) &&
9690
107k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
107k
        nbdef++;
9696
107k
    }
9697
129k
      }
9698
60.3k
  }
9699
643k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
9.34M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
4.94M
  if (atts[i + 1] != NULL) {
9709
216k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
216k
      if (nsname == NULL) {
9711
102k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
102k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
102k
        atts[i + 1], atts[i], localname);
9714
102k
      }
9715
216k
      atts[i + 2] = nsname;
9716
216k
  } else
9717
4.72M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
7.12M
        for (j = 0; j < i;j += 5) {
9725
2.18M
      if (atts[i] == atts[j]) {
9726
12.6k
          if (atts[i+1] == atts[j+1]) {
9727
3.70k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
3.70k
        break;
9729
3.70k
    }
9730
8.94k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
274
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
274
           "Namespaced Attribute %s in '%s' redefined\n",
9733
274
           atts[i], nsname, NULL);
9734
274
        break;
9735
274
    }
9736
8.94k
      }
9737
2.18M
  }
9738
4.94M
    }
9739
9740
4.39M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
4.39M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
140k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
140k
           "Namespace prefix %s on %s is not defined\n",
9744
140k
     prefix, localname, NULL);
9745
140k
    }
9746
4.39M
    *pref = prefix;
9747
4.39M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
4.39M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
4.39M
  (!ctxt->disableSAX)) {
9754
3.65M
  if (nbNs > 0)
9755
41.8k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
41.8k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
41.8k
        nbatts / 5, nbdef, atts);
9758
3.61M
  else
9759
3.61M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
3.61M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
3.65M
    }
9762
9763
4.39M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
4.39M
    if (attval != 0) {
9768
348k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
197k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
163k
          xmlFree((xmlChar *) atts[i]);
9771
151k
    }
9772
9773
4.39M
    return(localname);
9774
4.39M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
1.90M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
1.90M
    const xmlChar *name;
9794
9795
1.90M
    GROW;
9796
1.90M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
1.90M
    SKIP(2);
9801
9802
1.90M
    if (tag->prefix == NULL)
9803
1.78M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
121k
    else
9805
121k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
1.90M
    GROW;
9811
1.90M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
1.90M
    SKIP_BLANKS;
9814
1.90M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
27.4k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
27.4k
    } else
9817
1.87M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
1.90M
    if (name != (xmlChar*)1) {
9826
106k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
106k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
106k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
106k
                    ctxt->name, tag->line, name);
9830
106k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
1.90M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
1.90M
  (!ctxt->disableSAX))
9837
1.63M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
1.63M
                                tag->URI);
9839
9840
1.90M
    spacePop(ctxt);
9841
1.90M
    if (tag->nsNr != 0)
9842
9.25k
  nsPop(ctxt, tag->nsNr);
9843
1.90M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
15.0k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
15.0k
    xmlChar *buf = NULL;
9864
15.0k
    int len = 0;
9865
15.0k
    int size = XML_PARSER_BUFFER_SIZE;
9866
15.0k
    int r, rl;
9867
15.0k
    int s, sl;
9868
15.0k
    int cur, l;
9869
15.0k
    int count = 0;
9870
15.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
5.50k
                    XML_MAX_HUGE_LENGTH :
9872
15.0k
                    XML_MAX_TEXT_LENGTH;
9873
9874
15.0k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
15.0k
    SKIP(3);
9877
9878
15.0k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
15.0k
    SKIP(6);
9881
9882
15.0k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
15.0k
    r = CUR_CHAR(rl);
9884
15.0k
    if (!IS_CHAR(r)) {
9885
286
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
286
        goto out;
9887
286
    }
9888
14.7k
    NEXTL(rl);
9889
14.7k
    s = CUR_CHAR(sl);
9890
14.7k
    if (!IS_CHAR(s)) {
9891
570
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
570
        goto out;
9893
570
    }
9894
14.1k
    NEXTL(sl);
9895
14.1k
    cur = CUR_CHAR(l);
9896
14.1k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
14.1k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
9.65M
    while (IS_CHAR(cur) &&
9902
9.65M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
9.64M
  if (len + 5 >= size) {
9904
15.5k
      xmlChar *tmp;
9905
9906
15.5k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
15.5k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
15.5k
      buf = tmp;
9912
15.5k
      size *= 2;
9913
15.5k
  }
9914
9.64M
  COPY_BUF(rl,buf,len,r);
9915
9.64M
  r = s;
9916
9.64M
  rl = sl;
9917
9.64M
  s = cur;
9918
9.64M
  sl = l;
9919
9.64M
  count++;
9920
9.64M
  if (count > 50) {
9921
183k
      SHRINK;
9922
183k
      GROW;
9923
183k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
183k
      count = 0;
9927
183k
  }
9928
9.64M
  NEXTL(l);
9929
9.64M
  cur = CUR_CHAR(l);
9930
9.64M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
9.64M
    }
9936
14.1k
    buf[len] = 0;
9937
14.1k
    if (cur != '>') {
9938
1.96k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
1.96k
                       "CData section not finished\n%.50s\n", buf);
9940
1.96k
        goto out;
9941
1.96k
    }
9942
12.2k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
12.2k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
9.27k
  if (ctxt->sax->cdataBlock != NULL)
9949
5.99k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
3.27k
  else if (ctxt->sax->characters != NULL)
9951
3.27k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
9.27k
    }
9953
9954
15.0k
out:
9955
15.0k
    if (ctxt->instate != XML_PARSER_EOF)
9956
15.0k
        ctxt->instate = XML_PARSER_CONTENT;
9957
15.0k
    xmlFree(buf);
9958
15.0k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
132k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
132k
    int nameNr = ctxt->nameNr;
9971
9972
132k
    GROW;
9973
12.4M
    while ((RAW != 0) &&
9974
12.4M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
12.3M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
12.3M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
29.3k
      xmlParsePI(ctxt);
9982
29.3k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
12.2M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
15.0k
      xmlParseCDSect(ctxt);
9990
15.0k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
12.2M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
12.2M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
113k
      xmlParseComment(ctxt);
9998
113k
      ctxt->instate = XML_PARSER_CONTENT;
9999
113k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
12.1M
  else if (*cur == '<') {
10005
5.19M
            if (NXT(1) == '/') {
10006
1.68M
                if (ctxt->nameNr <= nameNr)
10007
18.7k
                    break;
10008
1.66M
          xmlParseElementEnd(ctxt);
10009
3.51M
            } else {
10010
3.51M
          xmlParseElementStart(ctxt);
10011
3.51M
            }
10012
5.19M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
6.96M
  else if (*cur == '&') {
10020
1.61M
      xmlParseReference(ctxt);
10021
1.61M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
5.35M
  else {
10027
5.35M
      xmlParseCharData(ctxt, 0);
10028
5.35M
  }
10029
10030
12.2M
  GROW;
10031
12.2M
  SHRINK;
10032
12.2M
    }
10033
132k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
89.2k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
89.2k
    int nameNr = ctxt->nameNr;
10047
10048
89.2k
    xmlParseContentInternal(ctxt);
10049
10050
89.2k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
4.82k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
4.82k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
4.82k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
4.82k
                "Premature end of data in tag %s line %d\n",
10055
4.82k
    name, line, NULL);
10056
4.82k
    }
10057
89.2k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
57.5k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
57.5k
    if (xmlParseElementStart(ctxt) != 0)
10078
13.9k
        return;
10079
10080
43.5k
    xmlParseContentInternal(ctxt);
10081
43.5k
    if (ctxt->instate == XML_PARSER_EOF)
10082
175
  return;
10083
10084
43.3k
    if (CUR == 0) {
10085
25.9k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
25.9k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
25.9k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
25.9k
                "Premature end of data in tag %s line %d\n",
10089
25.9k
    name, line, NULL);
10090
25.9k
        return;
10091
25.9k
    }
10092
10093
17.4k
    xmlParseElementEnd(ctxt);
10094
17.4k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
3.56M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
3.56M
    const xmlChar *name;
10108
3.56M
    const xmlChar *prefix = NULL;
10109
3.56M
    const xmlChar *URI = NULL;
10110
3.56M
    xmlParserNodeInfo node_info;
10111
3.56M
    int line, tlen = 0;
10112
3.56M
    xmlNodePtr ret;
10113
3.56M
    int nsNr = ctxt->nsNr;
10114
10115
3.56M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
3.56M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
3.56M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
3.56M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
3.56M
    else if (*ctxt->space == -2)
10134
723k
  spacePush(ctxt, -1);
10135
2.84M
    else
10136
2.84M
  spacePush(ctxt, *ctxt->space);
10137
10138
3.56M
    line = ctxt->input->line;
10139
3.56M
#ifdef LIBXML_SAX1_ENABLED
10140
3.56M
    if (ctxt->sax2)
10141
2.15M
#endif /* LIBXML_SAX1_ENABLED */
10142
2.15M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
1.40M
#ifdef LIBXML_SAX1_ENABLED
10144
1.40M
    else
10145
1.40M
  name = xmlParseStartTag(ctxt);
10146
3.56M
#endif /* LIBXML_SAX1_ENABLED */
10147
3.56M
    if (ctxt->instate == XML_PARSER_EOF)
10148
117
  return(-1);
10149
3.56M
    if (name == NULL) {
10150
225k
  spacePop(ctxt);
10151
225k
        return(-1);
10152
225k
    }
10153
3.34M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
3.34M
    ret = ctxt->node;
10155
10156
3.34M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
3.34M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
3.34M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
3.34M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
3.34M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
1.35M
        SKIP(2);
10172
1.35M
  if (ctxt->sax2) {
10173
938k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
938k
    (!ctxt->disableSAX))
10175
555k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
938k
#ifdef LIBXML_SAX1_ENABLED
10177
938k
  } else {
10178
420k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
420k
    (!ctxt->disableSAX))
10180
252k
    ctxt->sax->endElement(ctxt->userData, name);
10181
420k
#endif /* LIBXML_SAX1_ENABLED */
10182
420k
  }
10183
1.35M
  namePop(ctxt);
10184
1.35M
  spacePop(ctxt);
10185
1.35M
  if (nsNr != ctxt->nsNr)
10186
3.50k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
1.35M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
1.35M
  return(1);
10195
1.35M
    }
10196
1.98M
    if (RAW == '>') {
10197
1.80M
        NEXT1;
10198
1.80M
    } else {
10199
180k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
180k
         "Couldn't find end of Start Tag %s line %d\n",
10201
180k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
180k
  nodePop(ctxt);
10207
180k
  namePop(ctxt);
10208
180k
  spacePop(ctxt);
10209
180k
  if (nsNr != ctxt->nsNr)
10210
12.1k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
180k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
180k
  return(-1);
10223
180k
    }
10224
10225
1.80M
    return(0);
10226
1.98M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.68M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.68M
    xmlParserNodeInfo node_info;
10237
1.68M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.68M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.68M
    if (ctxt->sax2) {
10249
897k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
897k
  namePop(ctxt);
10251
897k
    }
10252
783k
#ifdef LIBXML_SAX1_ENABLED
10253
783k
    else
10254
783k
  xmlParseEndTag1(ctxt, 0);
10255
1.68M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.68M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.68M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
124k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
124k
    xmlChar *buf = NULL;
10286
124k
    int len = 0;
10287
124k
    int size = 10;
10288
124k
    xmlChar cur;
10289
10290
124k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
124k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
124k
    cur = CUR;
10296
124k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
750
  xmlFree(buf);
10298
750
  return(NULL);
10299
750
    }
10300
123k
    buf[len++] = cur;
10301
123k
    NEXT;
10302
123k
    cur=CUR;
10303
123k
    if (cur != '.') {
10304
1.58k
  xmlFree(buf);
10305
1.58k
  return(NULL);
10306
1.58k
    }
10307
122k
    buf[len++] = cur;
10308
122k
    NEXT;
10309
122k
    cur=CUR;
10310
640k
    while ((cur >= '0') && (cur <= '9')) {
10311
518k
  if (len + 1 >= size) {
10312
855
      xmlChar *tmp;
10313
10314
855
      size *= 2;
10315
855
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
855
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
855
      buf = tmp;
10322
855
  }
10323
518k
  buf[len++] = cur;
10324
518k
  NEXT;
10325
518k
  cur=CUR;
10326
518k
    }
10327
122k
    buf[len] = 0;
10328
122k
    return(buf);
10329
122k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
138k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
138k
    xmlChar *version = NULL;
10349
10350
138k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
127k
  SKIP(7);
10352
127k
  SKIP_BLANKS;
10353
127k
  if (RAW != '=') {
10354
2.19k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
2.19k
      return(NULL);
10356
2.19k
        }
10357
125k
  NEXT;
10358
125k
  SKIP_BLANKS;
10359
125k
  if (RAW == '"') {
10360
115k
      NEXT;
10361
115k
      version = xmlParseVersionNum(ctxt);
10362
115k
      if (RAW != '"') {
10363
4.17k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
4.17k
      } else
10365
111k
          NEXT;
10366
115k
  } else if (RAW == '\''){
10367
8.76k
      NEXT;
10368
8.76k
      version = xmlParseVersionNum(ctxt);
10369
8.76k
      if (RAW != '\'') {
10370
589
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
589
      } else
10372
8.17k
          NEXT;
10373
8.76k
  } else {
10374
940
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
940
  }
10376
125k
    }
10377
136k
    return(version);
10378
138k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
59.9k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
59.9k
    xmlChar *buf = NULL;
10395
59.9k
    int len = 0;
10396
59.9k
    int size = 10;
10397
59.9k
    xmlChar cur;
10398
10399
59.9k
    cur = CUR;
10400
59.9k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
59.9k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
58.5k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
58.5k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
58.5k
  buf[len++] = cur;
10409
58.5k
  NEXT;
10410
58.5k
  cur = CUR;
10411
924k
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
924k
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
924k
         ((cur >= '0') && (cur <= '9')) ||
10414
924k
         (cur == '.') || (cur == '_') ||
10415
924k
         (cur == '-')) {
10416
865k
      if (len + 1 >= size) {
10417
27.4k
          xmlChar *tmp;
10418
10419
27.4k
    size *= 2;
10420
27.4k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
27.4k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
27.4k
    buf = tmp;
10427
27.4k
      }
10428
865k
      buf[len++] = cur;
10429
865k
      NEXT;
10430
865k
      cur = CUR;
10431
865k
      if (cur == 0) {
10432
717
          SHRINK;
10433
717
    GROW;
10434
717
    cur = CUR;
10435
717
      }
10436
865k
        }
10437
58.5k
  buf[len] = 0;
10438
58.5k
    } else {
10439
1.37k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.37k
    }
10441
59.9k
    return(buf);
10442
59.9k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
97.5k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
97.5k
    xmlChar *encoding = NULL;
10462
10463
97.5k
    SKIP_BLANKS;
10464
97.5k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
61.3k
  SKIP(8);
10466
61.3k
  SKIP_BLANKS;
10467
61.3k
  if (RAW != '=') {
10468
686
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
686
      return(NULL);
10470
686
        }
10471
60.6k
  NEXT;
10472
60.6k
  SKIP_BLANKS;
10473
60.6k
  if (RAW == '"') {
10474
53.7k
      NEXT;
10475
53.7k
      encoding = xmlParseEncName(ctxt);
10476
53.7k
      if (RAW != '"') {
10477
3.31k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
3.31k
    xmlFree((xmlChar *) encoding);
10479
3.31k
    return(NULL);
10480
3.31k
      } else
10481
50.4k
          NEXT;
10482
53.7k
  } else if (RAW == '\''){
10483
6.21k
      NEXT;
10484
6.21k
      encoding = xmlParseEncName(ctxt);
10485
6.21k
      if (RAW != '\'') {
10486
707
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
707
    xmlFree((xmlChar *) encoding);
10488
707
    return(NULL);
10489
707
      } else
10490
5.50k
          NEXT;
10491
6.21k
  } else {
10492
676
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
676
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
56.6k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
25.5k
      xmlFree((xmlChar *) encoding);
10500
25.5k
            return(NULL);
10501
25.5k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
31.1k
        if ((encoding != NULL) &&
10508
31.1k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
30.7k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
1.32k
      if ((ctxt->encoding == NULL) &&
10517
1.32k
          (ctxt->input->buf != NULL) &&
10518
1.32k
          (ctxt->input->buf->encoder == NULL)) {
10519
1.10k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
1.10k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
1.10k
      }
10522
1.32k
      if (ctxt->encoding != NULL)
10523
220
    xmlFree((xmlChar *) ctxt->encoding);
10524
1.32k
      ctxt->encoding = encoding;
10525
1.32k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
29.7k
        else if ((encoding != NULL) &&
10530
29.7k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
29.3k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
11.3k
      if (ctxt->encoding != NULL)
10533
455
    xmlFree((xmlChar *) ctxt->encoding);
10534
11.3k
      ctxt->encoding = encoding;
10535
11.3k
  }
10536
18.3k
  else if (encoding != NULL) {
10537
18.0k
      xmlCharEncodingHandlerPtr handler;
10538
10539
18.0k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
18.0k
      ctxt->input->encoding = encoding;
10542
10543
18.0k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
18.0k
      if (handler != NULL) {
10545
17.6k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
582
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
582
        return(NULL);
10549
582
    }
10550
17.6k
      } else {
10551
345
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
345
      "Unsupported encoding %s\n", encoding);
10553
345
    return(NULL);
10554
345
      }
10555
18.0k
  }
10556
31.1k
    }
10557
66.3k
    return(encoding);
10558
97.5k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
68.6k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
68.6k
    int standalone = -2;
10596
10597
68.6k
    SKIP_BLANKS;
10598
68.6k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
12.6k
  SKIP(10);
10600
12.6k
        SKIP_BLANKS;
10601
12.6k
  if (RAW != '=') {
10602
132
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
132
      return(standalone);
10604
132
        }
10605
12.5k
  NEXT;
10606
12.5k
  SKIP_BLANKS;
10607
12.5k
        if (RAW == '\''){
10608
5.62k
      NEXT;
10609
5.62k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
4.67k
          standalone = 0;
10611
4.67k
                SKIP(2);
10612
4.67k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
954
                 (NXT(2) == 's')) {
10614
852
          standalone = 1;
10615
852
    SKIP(3);
10616
852
            } else {
10617
102
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
102
      }
10619
5.62k
      if (RAW != '\'') {
10620
186
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
186
      } else
10622
5.43k
          NEXT;
10623
6.89k
  } else if (RAW == '"'){
10624
6.83k
      NEXT;
10625
6.83k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
3.96k
          standalone = 0;
10627
3.96k
    SKIP(2);
10628
3.96k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
2.87k
                 (NXT(2) == 's')) {
10630
2.64k
          standalone = 1;
10631
2.64k
                SKIP(3);
10632
2.64k
            } else {
10633
228
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
228
      }
10635
6.83k
      if (RAW != '"') {
10636
330
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
330
      } else
10638
6.50k
          NEXT;
10639
6.83k
  } else {
10640
60
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
60
        }
10642
12.5k
    }
10643
68.4k
    return(standalone);
10644
68.6k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
119k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
119k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
119k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
119k
    SKIP(5);
10672
10673
119k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
119k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
119k
    version = xmlParseVersionInfo(ctxt);
10683
119k
    if (version == NULL) {
10684
10.7k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
108k
    } else {
10686
108k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
1.04k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
369
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
369
                "Unsupported version '%s'\n",
10693
369
                version);
10694
678
      } else {
10695
678
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
612
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
612
                      "Unsupported version '%s'\n",
10698
612
          version, NULL);
10699
612
    } else {
10700
66
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
66
              "Unsupported version '%s'\n",
10702
66
              version);
10703
66
    }
10704
678
      }
10705
1.04k
  }
10706
108k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
108k
  ctxt->version = version;
10709
108k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
119k
    if (!IS_BLANK_CH(RAW)) {
10715
53.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
40.9k
      SKIP(2);
10717
40.9k
      return;
10718
40.9k
  }
10719
12.7k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
12.7k
    }
10721
78.7k
    xmlParseEncodingDecl(ctxt);
10722
78.7k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
78.7k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
240
        return;
10728
240
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
78.4k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
10.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
9.87k
      SKIP(2);
10736
9.87k
      return;
10737
9.87k
  }
10738
1.03k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
1.03k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
68.6k
    GROW;
10745
10746
68.6k
    SKIP_BLANKS;
10747
68.6k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
68.6k
    SKIP_BLANKS;
10750
68.6k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
47.4k
        SKIP(2);
10752
47.4k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
786
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
786
  NEXT;
10756
20.3k
    } else {
10757
20.3k
        int c;
10758
10759
20.3k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
1.75M
        while ((c = CUR) != 0) {
10761
1.75M
            NEXT;
10762
1.75M
            if (c == '>')
10763
18.2k
                break;
10764
1.75M
        }
10765
20.3k
    }
10766
68.6k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
173k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
194k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
194k
        SKIP_BLANKS;
10783
194k
        GROW;
10784
194k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
12.4k
      xmlParsePI(ctxt);
10786
182k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
8.84k
      xmlParseComment(ctxt);
10788
173k
        } else {
10789
173k
            break;
10790
173k
        }
10791
194k
    }
10792
173k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
76.0k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
76.0k
    xmlChar start[4];
10812
76.0k
    xmlCharEncoding enc;
10813
10814
76.0k
    xmlInitParser();
10815
10816
76.0k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
76.0k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
76.0k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
76.0k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
76.0k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
76.0k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
76.0k
    if ((ctxt->encoding == NULL) &&
10835
76.0k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
75.8k
  start[0] = RAW;
10842
75.8k
  start[1] = NXT(1);
10843
75.8k
  start[2] = NXT(2);
10844
75.8k
  start[3] = NXT(3);
10845
75.8k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
75.8k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
42.4k
      xmlSwitchEncoding(ctxt, enc);
10848
42.4k
  }
10849
75.8k
    }
10850
10851
10852
76.0k
    if (CUR == 0) {
10853
152
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
152
  return(-1);
10855
152
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
75.8k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
2.68k
       GROW;
10865
2.68k
    }
10866
75.8k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
39.9k
  xmlParseXMLDecl(ctxt);
10872
39.9k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
39.9k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
80
      return(-1);
10878
80
  }
10879
39.8k
  ctxt->standalone = ctxt->input->standalone;
10880
39.8k
  SKIP_BLANKS;
10881
39.8k
    } else {
10882
35.9k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
35.9k
    }
10884
75.8k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
74.0k
        ctxt->sax->startDocument(ctxt->userData);
10886
75.8k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
75.8k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
75.8k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
75.8k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
75.8k
    GROW;
10903
75.8k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
51.5k
  ctxt->inSubset = 1;
10906
51.5k
  xmlParseDocTypeDecl(ctxt);
10907
51.5k
  if (RAW == '[') {
10908
40.2k
      ctxt->instate = XML_PARSER_DTD;
10909
40.2k
      xmlParseInternalSubset(ctxt);
10910
40.2k
      if (ctxt->instate == XML_PARSER_EOF)
10911
8.86k
    return(-1);
10912
40.2k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
42.6k
  ctxt->inSubset = 2;
10918
42.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
42.6k
      (!ctxt->disableSAX))
10920
40.6k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
40.6k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
42.6k
  if (ctxt->instate == XML_PARSER_EOF)
10923
2.49k
      return(-1);
10924
40.1k
  ctxt->inSubset = 0;
10925
10926
40.1k
        xmlCleanSpecialAttr(ctxt);
10927
10928
40.1k
  ctxt->instate = XML_PARSER_PROLOG;
10929
40.1k
  xmlParseMisc(ctxt);
10930
40.1k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
64.4k
    GROW;
10936
64.4k
    if (RAW != '<') {
10937
6.95k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
6.95k
           "Start tag expected, '<' not found\n");
10939
57.5k
    } else {
10940
57.5k
  ctxt->instate = XML_PARSER_CONTENT;
10941
57.5k
  xmlParseElement(ctxt);
10942
57.5k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
57.5k
  xmlParseMisc(ctxt);
10949
10950
57.5k
  if (RAW != 0) {
10951
14.5k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
14.5k
  }
10953
57.5k
  ctxt->instate = XML_PARSER_EOF;
10954
57.5k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
64.4k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
64.4k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
64.4k
    if ((ctxt->myDoc != NULL) &&
10966
64.4k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
114
  xmlFreeDoc(ctxt->myDoc);
10968
114
  ctxt->myDoc = NULL;
10969
114
    }
10970
10971
64.4k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
7.73k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
7.73k
  if (ctxt->valid)
10974
5.17k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
7.73k
  if (ctxt->nsWellFormed)
10976
7.22k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
7.73k
  if (ctxt->options & XML_PARSE_OLD10)
10978
1.50k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
7.73k
    }
10980
64.4k
    if (! ctxt->wellFormed) {
10981
56.7k
  ctxt->valid = 0;
10982
56.7k
  return(-1);
10983
56.7k
    }
10984
7.73k
    return(0);
10985
64.4k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
3.05M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
3.05M
    const xmlChar *cur;
11110
11111
3.05M
    if (ctxt->checkIndex == 0) {
11112
2.93M
        cur = ctxt->input->cur + 1;
11113
2.93M
    } else {
11114
118k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
118k
    }
11116
11117
3.05M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
124k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
124k
        return(0);
11120
2.93M
    } else {
11121
2.93M
        ctxt->checkIndex = 0;
11122
2.93M
        return(1);
11123
2.93M
    }
11124
3.05M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
646k
                     const char *str, size_t strLen) {
11138
646k
    const xmlChar *cur, *term;
11139
11140
646k
    if (ctxt->checkIndex == 0) {
11141
340k
        cur = ctxt->input->cur + startDelta;
11142
340k
    } else {
11143
305k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
305k
    }
11145
11146
646k
    term = BAD_CAST strstr((const char *) cur, str);
11147
646k
    if (term == NULL) {
11148
375k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
375k
        if ((size_t) (end - cur) < strLen)
11152
6.67k
            end = cur;
11153
368k
        else
11154
368k
            end -= strLen - 1;
11155
375k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
375k
    } else {
11157
271k
        ctxt->checkIndex = 0;
11158
271k
    }
11159
11160
646k
    return(term);
11161
646k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
4.27M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
4.27M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
4.27M
    const xmlChar *end = ctxt->input->end;
11173
11174
78.7M
    while (cur < end) {
11175
78.2M
        if ((*cur == '<') || (*cur == '&')) {
11176
3.78M
            ctxt->checkIndex = 0;
11177
3.78M
            return(1);
11178
3.78M
        }
11179
74.4M
        cur++;
11180
74.4M
    }
11181
11182
497k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
497k
    return(0);
11184
4.27M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
4.21M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
4.21M
    const xmlChar *cur;
11196
4.21M
    const xmlChar *end = ctxt->input->end;
11197
4.21M
    int state = ctxt->endCheckState;
11198
11199
4.21M
    if (ctxt->checkIndex == 0)
11200
3.40M
        cur = ctxt->input->cur + 1;
11201
818k
    else
11202
818k
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
180M
    while (cur < end) {
11205
179M
        if (state) {
11206
103M
            if (*cur == state)
11207
4.26M
                state = 0;
11208
103M
        } else if (*cur == '\'' || *cur == '"') {
11209
4.28M
            state = *cur;
11210
71.5M
        } else if (*cur == '>') {
11211
3.36M
            ctxt->checkIndex = 0;
11212
3.36M
            ctxt->endCheckState = 0;
11213
3.36M
            return(1);
11214
3.36M
        }
11215
175M
        cur++;
11216
175M
    }
11217
11218
852k
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
852k
    ctxt->endCheckState = state;
11220
852k
    return(0);
11221
4.21M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
288k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
288k
    const xmlChar *cur, *start;
11240
288k
    const xmlChar *end = ctxt->input->end;
11241
288k
    int state = ctxt->endCheckState;
11242
11243
288k
    if (ctxt->checkIndex == 0) {
11244
77.5k
        cur = ctxt->input->cur + 1;
11245
210k
    } else {
11246
210k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
210k
    }
11248
288k
    start = cur;
11249
11250
50.5M
    while (cur < end) {
11251
50.3M
        if (state == '-') {
11252
5.42M
            if ((*cur == '-') &&
11253
5.42M
                (cur[1] == '-') &&
11254
5.42M
                (cur[2] == '>')) {
11255
36.8k
                state = 0;
11256
36.8k
                cur += 3;
11257
36.8k
                start = cur;
11258
36.8k
                continue;
11259
36.8k
            }
11260
5.42M
        }
11261
44.9M
        else if (state == ']') {
11262
76.4k
            if (*cur == '>') {
11263
67.4k
                ctxt->checkIndex = 0;
11264
67.4k
                ctxt->endCheckState = 0;
11265
67.4k
                return(1);
11266
67.4k
            }
11267
8.93k
            if (IS_BLANK_CH(*cur)) {
11268
4.90k
                state = ' ';
11269
4.90k
            } else if (*cur != ']') {
11270
1.90k
                state = 0;
11271
1.90k
                start = cur;
11272
1.90k
                continue;
11273
1.90k
            }
11274
8.93k
        }
11275
44.8M
        else if (state == ' ') {
11276
126k
            if (*cur == '>') {
11277
144
                ctxt->checkIndex = 0;
11278
144
                ctxt->endCheckState = 0;
11279
144
                return(1);
11280
144
            }
11281
126k
            if (!IS_BLANK_CH(*cur)) {
11282
4.75k
                state = 0;
11283
4.75k
                start = cur;
11284
4.75k
                continue;
11285
4.75k
            }
11286
126k
        }
11287
44.7M
        else if (state != 0) {
11288
21.7M
            if (*cur == state) {
11289
539k
                state = 0;
11290
539k
                start = cur + 1;
11291
539k
            }
11292
21.7M
        }
11293
22.9M
        else if (*cur == '<') {
11294
641k
            if ((cur[1] == '!') &&
11295
641k
                (cur[2] == '-') &&
11296
641k
                (cur[3] == '-')) {
11297
37.8k
                state = '-';
11298
37.8k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
37.8k
                start = cur;
11301
37.8k
                continue;
11302
37.8k
            }
11303
641k
        }
11304
22.2M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
617k
            state = *cur;
11306
617k
        }
11307
11308
50.1M
        cur++;
11309
50.1M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
220k
    if ((state == 0) || (state == '-')) {
11316
117k
        if (cur - start < 3)
11317
10.4k
            cur = start;
11318
107k
        else
11319
107k
            cur -= 3;
11320
117k
    }
11321
220k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
220k
    ctxt->endCheckState = state;
11323
220k
    return(0);
11324
288k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
129k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
129k
    int ix;
11340
129k
    unsigned char c;
11341
129k
    int codepoint;
11342
11343
129k
    if ((utf == NULL) || (len <= 0))
11344
2.15k
        return(0);
11345
11346
8.17M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
8.14M
        c = utf[ix];
11348
8.14M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
7.46M
      if (c >= 0x20)
11350
6.91M
    ix++;
11351
551k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
545k
          ix++;
11353
6.61k
      else
11354
6.61k
          return(-ix);
11355
7.46M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
378k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
375k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
37.1k
          return(-ix);
11359
338k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
338k
      codepoint |= utf[ix+1] & 0x3f;
11361
338k
      if (!xmlIsCharQ(codepoint))
11362
2.41k
          return(-ix);
11363
336k
      ix += 2;
11364
336k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
114k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
111k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
111k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
6.84k
        return(-ix);
11369
105k
      codepoint = (utf[ix] & 0xf) << 12;
11370
105k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
105k
      codepoint |= utf[ix+2] & 0x3f;
11372
105k
      if (!xmlIsCharQ(codepoint))
11373
1.94k
          return(-ix);
11374
103k
      ix += 3;
11375
182k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
171k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
169k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
169k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
169k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
15.6k
        return(-ix);
11381
153k
      codepoint = (utf[ix] & 0x7) << 18;
11382
153k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
153k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
153k
      codepoint |= utf[ix+3] & 0x3f;
11385
153k
      if (!xmlIsCharQ(codepoint))
11386
6.00k
          return(-ix);
11387
147k
      ix += 4;
11388
147k
  } else       /* unknown encoding */
11389
10.7k
      return(-ix);
11390
8.14M
      }
11391
32.4k
      return(ix);
11392
127k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
2.42M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
2.42M
    int ret = 0;
11406
2.42M
    int avail, tlen;
11407
2.42M
    xmlChar cur, next;
11408
11409
2.42M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
2.42M
    if ((ctxt->input != NULL) &&
11466
2.42M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
40.1k
        xmlParserInputShrink(ctxt->input);
11468
40.1k
    }
11469
11470
22.4M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
22.4M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
38.8k
      return(0);
11473
11474
22.4M
  if (ctxt->input == NULL) break;
11475
22.4M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
22.4M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
22.4M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
22.4M
          (ctxt->input->buf->raw != NULL) &&
11488
22.4M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
66.0k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
66.0k
                                                 ctxt->input);
11491
66.0k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
66.0k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
66.0k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
66.0k
                                      base, current);
11496
66.0k
      }
11497
22.4M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
22.4M
        (ctxt->input->cur - ctxt->input->base);
11499
22.4M
  }
11500
22.4M
        if (avail < 1)
11501
126k
      goto done;
11502
22.2M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
402k
            case XML_PARSER_START:
11509
402k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
110k
        xmlChar start[4];
11511
110k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
110k
        if (avail < 4)
11517
969
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
109k
        start[0] = RAW;
11527
109k
        start[1] = NXT(1);
11528
109k
        start[2] = NXT(2);
11529
109k
        start[3] = NXT(3);
11530
109k
        enc = xmlDetectCharEncoding(start, 4);
11531
109k
        xmlSwitchEncoding(ctxt, enc);
11532
109k
        break;
11533
110k
    }
11534
11535
292k
    if (avail < 2)
11536
53
        goto done;
11537
292k
    cur = ctxt->input->cur[0];
11538
292k
    next = ctxt->input->cur[1];
11539
292k
    if (cur == 0) {
11540
228
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
228
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
228
                  &xmlDefaultSAXLocator);
11543
228
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
228
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
228
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
228
      ctxt->sax->endDocument(ctxt->userData);
11551
228
        goto done;
11552
228
    }
11553
292k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
226k
        if (avail < 5) goto done;
11556
226k
        if ((!terminate) &&
11557
226k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
141k
      goto done;
11559
85.6k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
85.6k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
85.6k
                  &xmlDefaultSAXLocator);
11562
85.6k
        if ((ctxt->input->cur[2] == 'x') &&
11563
85.6k
      (ctxt->input->cur[3] == 'm') &&
11564
85.6k
      (ctxt->input->cur[4] == 'l') &&
11565
85.6k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
79.7k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
79.7k
      xmlParseXMLDecl(ctxt);
11572
79.7k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
160
          xmlHaltParser(ctxt);
11578
160
          return(0);
11579
160
      }
11580
79.5k
      ctxt->standalone = ctxt->input->standalone;
11581
79.5k
      if ((ctxt->encoding == NULL) &&
11582
79.5k
          (ctxt->input->encoding != NULL))
11583
10.4k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
79.5k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
79.5k
          (!ctxt->disableSAX))
11586
76.0k
          ctxt->sax->startDocument(ctxt->userData);
11587
79.5k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
79.5k
        } else {
11593
5.95k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
5.95k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
5.95k
          (!ctxt->disableSAX))
11596
5.95k
          ctxt->sax->startDocument(ctxt->userData);
11597
5.95k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
5.95k
        }
11603
85.6k
    } else {
11604
65.4k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
65.4k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
65.4k
                  &xmlDefaultSAXLocator);
11607
65.4k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
65.4k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
65.4k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
65.4k
            (!ctxt->disableSAX))
11614
65.4k
      ctxt->sax->startDocument(ctxt->userData);
11615
65.4k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
65.4k
    }
11621
150k
    break;
11622
4.67M
            case XML_PARSER_START_TAG: {
11623
4.67M
          const xmlChar *name;
11624
4.67M
    const xmlChar *prefix = NULL;
11625
4.67M
    const xmlChar *URI = NULL;
11626
4.67M
                int line = ctxt->input->line;
11627
4.67M
    int nsNr = ctxt->nsNr;
11628
11629
4.67M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
4.67M
    cur = ctxt->input->cur[0];
11632
4.67M
          if (cur != '<') {
11633
8.48k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
8.48k
        xmlHaltParser(ctxt);
11635
8.48k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
8.48k
      ctxt->sax->endDocument(ctxt->userData);
11637
8.48k
        goto done;
11638
8.48k
    }
11639
4.66M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
796k
                    goto done;
11641
3.86M
    if (ctxt->spaceNr == 0)
11642
34.2k
        spacePush(ctxt, -1);
11643
3.83M
    else if (*ctxt->space == -2)
11644
509k
        spacePush(ctxt, -1);
11645
3.32M
    else
11646
3.32M
        spacePush(ctxt, *ctxt->space);
11647
3.86M
#ifdef LIBXML_SAX1_ENABLED
11648
3.86M
    if (ctxt->sax2)
11649
2.38M
#endif /* LIBXML_SAX1_ENABLED */
11650
2.38M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
1.48M
#ifdef LIBXML_SAX1_ENABLED
11652
1.48M
    else
11653
1.48M
        name = xmlParseStartTag(ctxt);
11654
3.86M
#endif /* LIBXML_SAX1_ENABLED */
11655
3.86M
    if (ctxt->instate == XML_PARSER_EOF)
11656
126
        goto done;
11657
3.86M
    if (name == NULL) {
11658
9.74k
        spacePop(ctxt);
11659
9.74k
        xmlHaltParser(ctxt);
11660
9.74k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
9.74k
      ctxt->sax->endDocument(ctxt->userData);
11662
9.74k
        goto done;
11663
9.74k
    }
11664
3.86M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
3.86M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
3.86M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
3.86M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
3.86M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
1.57M
        SKIP(2);
11680
11681
1.57M
        if (ctxt->sax2) {
11682
1.13M
      if ((ctxt->sax != NULL) &&
11683
1.13M
          (ctxt->sax->endElementNs != NULL) &&
11684
1.13M
          (!ctxt->disableSAX))
11685
1.13M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
1.13M
                                  prefix, URI);
11687
1.13M
      if (ctxt->nsNr - nsNr > 0)
11688
3.71k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
1.13M
#ifdef LIBXML_SAX1_ENABLED
11690
1.13M
        } else {
11691
437k
      if ((ctxt->sax != NULL) &&
11692
437k
          (ctxt->sax->endElement != NULL) &&
11693
437k
          (!ctxt->disableSAX))
11694
436k
          ctxt->sax->endElement(ctxt->userData, name);
11695
437k
#endif /* LIBXML_SAX1_ENABLED */
11696
437k
        }
11697
1.57M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
1.57M
        spacePop(ctxt);
11700
1.57M
        if (ctxt->nameNr == 0) {
11701
5.16k
      ctxt->instate = XML_PARSER_EPILOG;
11702
1.56M
        } else {
11703
1.56M
      ctxt->instate = XML_PARSER_CONTENT;
11704
1.56M
        }
11705
1.57M
        break;
11706
1.57M
    }
11707
2.28M
    if (RAW == '>') {
11708
2.05M
        NEXT;
11709
2.05M
    } else {
11710
236k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
236k
           "Couldn't find end of Start Tag %s\n",
11712
236k
           name);
11713
236k
        nodePop(ctxt);
11714
236k
        spacePop(ctxt);
11715
236k
    }
11716
2.28M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
2.28M
    ctxt->instate = XML_PARSER_CONTENT;
11719
2.28M
                break;
11720
3.86M
      }
11721
14.4M
            case XML_PARSER_CONTENT: {
11722
14.4M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
58.0k
        goto done;
11724
14.3M
    cur = ctxt->input->cur[0];
11725
14.3M
    next = ctxt->input->cur[1];
11726
11727
14.3M
    if ((cur == '<') && (next == '/')) {
11728
1.88M
        ctxt->instate = XML_PARSER_END_TAG;
11729
1.88M
        break;
11730
12.4M
          } else if ((cur == '<') && (next == '?')) {
11731
34.3k
        if ((!terminate) &&
11732
34.3k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
15.6k
      goto done;
11734
18.6k
        xmlParsePI(ctxt);
11735
18.6k
        ctxt->instate = XML_PARSER_CONTENT;
11736
12.4M
    } else if ((cur == '<') && (next != '!')) {
11737
3.77M
        ctxt->instate = XML_PARSER_START_TAG;
11738
3.77M
        break;
11739
8.67M
    } else if ((cur == '<') && (next == '!') &&
11740
8.67M
               (ctxt->input->cur[2] == '-') &&
11741
8.67M
         (ctxt->input->cur[3] == '-')) {
11742
194k
        if ((!terminate) &&
11743
194k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
86.2k
      goto done;
11745
108k
        xmlParseComment(ctxt);
11746
108k
        ctxt->instate = XML_PARSER_CONTENT;
11747
8.48M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
8.48M
        (ctxt->input->cur[2] == '[') &&
11749
8.48M
        (ctxt->input->cur[3] == 'C') &&
11750
8.48M
        (ctxt->input->cur[4] == 'D') &&
11751
8.48M
        (ctxt->input->cur[5] == 'A') &&
11752
8.48M
        (ctxt->input->cur[6] == 'T') &&
11753
8.48M
        (ctxt->input->cur[7] == 'A') &&
11754
8.48M
        (ctxt->input->cur[8] == '[')) {
11755
14.7k
        SKIP(9);
11756
14.7k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
14.7k
        break;
11758
8.46M
    } else if ((cur == '<') && (next == '!') &&
11759
8.46M
               (avail < 9)) {
11760
8.80k
        goto done;
11761
8.45M
    } else if (cur == '<') {
11762
189k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
189k
                    "detected an error in element content\n");
11764
189k
                    SKIP(1);
11765
8.26M
    } else if (cur == '&') {
11766
1.65M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
70.4k
      goto done;
11768
1.58M
        xmlParseReference(ctxt);
11769
6.61M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
6.61M
        if ((ctxt->inputNr == 1) &&
11783
6.61M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
4.43M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
497k
          goto done;
11786
4.43M
                    }
11787
6.11M
                    ctxt->checkIndex = 0;
11788
6.11M
        xmlParseCharData(ctxt, 0);
11789
6.11M
    }
11790
8.01M
    break;
11791
14.3M
      }
11792
8.01M
            case XML_PARSER_END_TAG:
11793
1.93M
    if (avail < 2)
11794
0
        goto done;
11795
1.93M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
54.4k
        goto done;
11797
1.88M
    if (ctxt->sax2) {
11798
1.00M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.00M
        nameNsPop(ctxt);
11800
1.00M
    }
11801
871k
#ifdef LIBXML_SAX1_ENABLED
11802
871k
      else
11803
871k
        xmlParseEndTag1(ctxt, 0);
11804
1.88M
#endif /* LIBXML_SAX1_ENABLED */
11805
1.88M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
1.88M
    } else if (ctxt->nameNr == 0) {
11808
21.9k
        ctxt->instate = XML_PARSER_EPILOG;
11809
1.85M
    } else {
11810
1.85M
        ctxt->instate = XML_PARSER_CONTENT;
11811
1.85M
    }
11812
1.88M
    break;
11813
178k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
178k
    const xmlChar *term;
11819
11820
178k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
3.52k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
3.52k
                                           "]]>");
11827
175k
                } else {
11828
175k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
175k
                }
11830
11831
178k
    if (term == NULL) {
11832
107k
        int tmp, size;
11833
11834
107k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
1.25k
                        size = ctxt->input->end - ctxt->input->cur;
11837
106k
                    } else {
11838
106k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
49.6k
                            goto done;
11840
56.9k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
56.9k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
56.9k
                    }
11844
58.2k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
58.2k
                    if (tmp <= 0) {
11846
34.6k
                        tmp = -tmp;
11847
34.6k
                        ctxt->input->cur += tmp;
11848
34.6k
                        goto encoding_error;
11849
34.6k
                    }
11850
23.5k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
23.5k
                        if (ctxt->sax->cdataBlock != NULL)
11852
11.0k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
11.0k
                                                  ctxt->input->cur, tmp);
11854
12.5k
                        else if (ctxt->sax->characters != NULL)
11855
12.5k
                            ctxt->sax->characters(ctxt->userData,
11856
12.5k
                                                  ctxt->input->cur, tmp);
11857
23.5k
                    }
11858
23.5k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
23.5k
                    SKIPL(tmp);
11861
71.0k
    } else {
11862
71.0k
                    int base = term - CUR_PTR;
11863
71.0k
        int tmp;
11864
11865
71.0k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
71.0k
        if ((tmp < 0) || (tmp != base)) {
11867
59.1k
      tmp = -tmp;
11868
59.1k
      ctxt->input->cur += tmp;
11869
59.1k
      goto encoding_error;
11870
59.1k
        }
11871
11.8k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
11.8k
            (ctxt->sax->cdataBlock != NULL) &&
11873
11.8k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
1.53k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
1.53k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
1.53k
                     "<![CDATA[", 9)))
11882
1.52k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
1.52k
                                 BAD_CAST "", 0);
11884
10.3k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
10.3k
      (!ctxt->disableSAX)) {
11886
9.70k
      if (ctxt->sax->cdataBlock != NULL)
11887
5.99k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
5.99k
              ctxt->input->cur, base);
11889
3.71k
      else if (ctxt->sax->characters != NULL)
11890
3.71k
          ctxt->sax->characters(ctxt->userData,
11891
3.71k
              ctxt->input->cur, base);
11892
9.70k
        }
11893
11.8k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
11.8k
        SKIPL(base + 3);
11896
11.8k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
11.8k
    }
11902
35.4k
    break;
11903
178k
      }
11904
251k
            case XML_PARSER_MISC:
11905
353k
            case XML_PARSER_PROLOG:
11906
382k
            case XML_PARSER_EPILOG:
11907
382k
    SKIP_BLANKS;
11908
382k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
382k
    else
11912
382k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
382k
                (ctxt->input->cur - ctxt->input->base);
11914
382k
    if (avail < 2)
11915
23.3k
        goto done;
11916
359k
    cur = ctxt->input->cur[0];
11917
359k
    next = ctxt->input->cur[1];
11918
359k
          if ((cur == '<') && (next == '?')) {
11919
32.5k
        if ((!terminate) &&
11920
32.5k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
11.2k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
21.2k
        xmlParsePI(ctxt);
11927
21.2k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
326k
    } else if ((cur == '<') && (next == '!') &&
11930
326k
        (ctxt->input->cur[2] == '-') &&
11931
326k
        (ctxt->input->cur[3] == '-')) {
11932
30.4k
        if ((!terminate) &&
11933
30.4k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
14.4k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
15.9k
        xmlParseComment(ctxt);
11940
15.9k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
296k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
296k
                    (cur == '<') && (next == '!') &&
11944
296k
        (ctxt->input->cur[2] == 'D') &&
11945
296k
        (ctxt->input->cur[3] == 'O') &&
11946
296k
        (ctxt->input->cur[4] == 'C') &&
11947
296k
        (ctxt->input->cur[5] == 'T') &&
11948
296k
        (ctxt->input->cur[6] == 'Y') &&
11949
296k
        (ctxt->input->cur[7] == 'P') &&
11950
296k
        (ctxt->input->cur[8] == 'E')) {
11951
157k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
55.9k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
101k
        ctxt->inSubset = 1;
11958
101k
        xmlParseDocTypeDecl(ctxt);
11959
101k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
101k
        if (RAW == '[') {
11962
79.3k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
79.3k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
21.9k
      ctxt->inSubset = 2;
11972
21.9k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
21.9k
          (ctxt->sax->externalSubset != NULL))
11974
20.9k
          ctxt->sax->externalSubset(ctxt->userData,
11975
20.9k
            ctxt->intSubName, ctxt->extSubSystem,
11976
20.9k
            ctxt->extSubURI);
11977
21.9k
      ctxt->inSubset = 0;
11978
21.9k
      xmlCleanSpecialAttr(ctxt);
11979
21.9k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
21.9k
        }
11985
139k
    } else if ((cur == '<') && (next == '!') &&
11986
139k
               (avail <
11987
18.3k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
17.4k
        goto done;
11989
121k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
4.70k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
4.70k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
4.70k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
4.70k
      ctxt->sax->endDocument(ctxt->userData);
11998
4.70k
        goto done;
11999
116k
                } else {
12000
116k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
116k
    }
12006
255k
    break;
12007
299k
            case XML_PARSER_DTD: {
12008
299k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
220k
                    goto done;
12010
78.8k
    xmlParseInternalSubset(ctxt);
12011
78.8k
    if (ctxt->instate == XML_PARSER_EOF)
12012
16.3k
        goto done;
12013
62.5k
    ctxt->inSubset = 2;
12014
62.5k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
62.5k
        (ctxt->sax->externalSubset != NULL))
12016
60.4k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
60.4k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
62.5k
    ctxt->inSubset = 0;
12019
62.5k
    xmlCleanSpecialAttr(ctxt);
12020
62.5k
    if (ctxt->instate == XML_PARSER_EOF)
12021
1.86k
        goto done;
12022
60.6k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
60.6k
                break;
12028
62.5k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
22.2M
  }
12102
22.2M
    }
12103
2.28M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
2.28M
    return(ret);
12108
93.7k
encoding_error:
12109
93.7k
    {
12110
93.7k
        char buffer[150];
12111
12112
93.7k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
93.7k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
93.7k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
93.7k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
93.7k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
93.7k
         BAD_CAST buffer, NULL);
12118
93.7k
    }
12119
93.7k
    return(0);
12120
2.42M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
3.07M
              int terminate) {
12136
3.07M
    int end_in_lf = 0;
12137
3.07M
    int remain = 0;
12138
12139
3.07M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
3.07M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
664k
        return(ctxt->errNo);
12143
2.41M
    if (ctxt->instate == XML_PARSER_EOF)
12144
81
        return(-1);
12145
2.41M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
2.41M
    ctxt->progressive = 1;
12149
2.41M
    if (ctxt->instate == XML_PARSER_START)
12150
284k
        xmlDetectSAX2(ctxt);
12151
2.41M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
2.41M
        (chunk[size - 1] == '\r')) {
12153
17.0k
  end_in_lf = 1;
12154
17.0k
  size--;
12155
17.0k
    }
12156
12157
2.42M
xmldecl_done:
12158
12159
2.42M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
2.42M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
2.31M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
2.31M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
2.31M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
2.31M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
2.31M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
14.6k
            unsigned int len = 45;
12173
12174
14.6k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
14.6k
                               BAD_CAST "UTF-16")) ||
12176
14.6k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
2.32k
                               BAD_CAST "UTF16")))
12178
12.3k
                len = 90;
12179
2.32k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
2.32k
                                    BAD_CAST "UCS-4")) ||
12181
2.32k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
2.26k
                                    BAD_CAST "UCS4")))
12183
56
                len = 180;
12184
12185
14.6k
            if (ctxt->input->buf->rawconsumed < len)
12186
1.00k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
14.6k
            if ((unsigned int) size > len) {
12194
9.75k
                remain = size - len;
12195
9.75k
                size = len;
12196
9.75k
            } else {
12197
4.93k
                remain = 0;
12198
4.93k
            }
12199
14.6k
        }
12200
2.31M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
2.31M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
2.31M
  if (res < 0) {
12203
252
      ctxt->errNo = XML_PARSER_EOF;
12204
252
      xmlHaltParser(ctxt);
12205
252
      return (XML_PARSER_EOF);
12206
252
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
2.31M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
105k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
105k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
105k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
105k
        (in->raw != NULL)) {
12216
6.79k
    int nbchars;
12217
6.79k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
6.79k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
6.79k
    nbchars = xmlCharEncInput(in, terminate);
12221
6.79k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
6.79k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
165
        xmlGenericError(xmlGenericErrorContext,
12225
165
            "xmlParseChunk: encoder error\n");
12226
165
                    xmlHaltParser(ctxt);
12227
165
        return(XML_ERR_INVALID_ENCODING);
12228
165
    }
12229
6.79k
      }
12230
105k
  }
12231
105k
    }
12232
12233
2.42M
    if (remain != 0) {
12234
9.64k
        xmlParseTryOrFinish(ctxt, 0);
12235
2.41M
    } else {
12236
2.41M
        xmlParseTryOrFinish(ctxt, terminate);
12237
2.41M
    }
12238
2.42M
    if (ctxt->instate == XML_PARSER_EOF)
12239
42.0k
        return(ctxt->errNo);
12240
12241
2.38M
    if ((ctxt->input != NULL) &&
12242
2.38M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
2.38M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
2.38M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
2.38M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
39.4k
        return(ctxt->errNo);
12250
12251
2.34M
    if (remain != 0) {
12252
9.48k
        chunk += size;
12253
9.48k
        size = remain;
12254
9.48k
        remain = 0;
12255
9.48k
        goto xmldecl_done;
12256
9.48k
    }
12257
2.33M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
2.33M
        (ctxt->input->buf != NULL)) {
12259
16.7k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
16.7k
           ctxt->input);
12261
16.7k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
16.7k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
16.7k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
16.7k
            base, current);
12267
16.7k
    }
12268
2.33M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
49.3k
  int cur_avail = 0;
12273
12274
49.3k
  if (ctxt->input != NULL) {
12275
49.3k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
49.3k
      else
12279
49.3k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
49.3k
                    (ctxt->input->cur - ctxt->input->base);
12281
49.3k
  }
12282
12283
49.3k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
49.3k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
30.7k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
30.7k
  }
12287
49.3k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
225
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
225
  }
12290
49.3k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
49.3k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
49.3k
    ctxt->sax->endDocument(ctxt->userData);
12293
49.3k
  }
12294
49.3k
  ctxt->instate = XML_PARSER_EOF;
12295
49.3k
    }
12296
2.33M
    if (ctxt->wellFormed == 0)
12297
922k
  return((xmlParserErrors) ctxt->errNo);
12298
1.40M
    else
12299
1.40M
        return(0);
12300
2.33M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
152k
                        const char *chunk, int size, const char *filename) {
12330
152k
    xmlParserCtxtPtr ctxt;
12331
152k
    xmlParserInputPtr inputStream;
12332
152k
    xmlParserInputBufferPtr buf;
12333
152k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
152k
    if ((chunk != NULL) && (size >= 4))
12339
75.8k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
152k
    buf = xmlAllocParserInputBuffer(enc);
12342
152k
    if (buf == NULL) return(NULL);
12343
12344
152k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
152k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
152k
    ctxt->dictNames = 1;
12351
152k
    if (filename == NULL) {
12352
76.0k
  ctxt->directory = NULL;
12353
76.0k
    } else {
12354
76.0k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
76.0k
    }
12356
12357
152k
    inputStream = xmlNewInputStream(ctxt);
12358
152k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
152k
    if (filename == NULL)
12365
76.0k
  inputStream->filename = NULL;
12366
76.0k
    else {
12367
76.0k
  inputStream->filename = (char *)
12368
76.0k
      xmlCanonicPath((const xmlChar *) filename);
12369
76.0k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
76.0k
    }
12376
152k
    inputStream->buf = buf;
12377
152k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
152k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
152k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
152k
    if ((size != 0) && (chunk != NULL) &&
12388
152k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
75.8k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
75.8k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
75.8k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
75.8k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
75.8k
    }
12399
12400
152k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
42.4k
        xmlSwitchEncoding(ctxt, enc);
12402
42.4k
    }
12403
12404
152k
    return(ctxt);
12405
152k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
137k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
137k
    if (ctxt == NULL)
12418
0
        return;
12419
137k
    ctxt->instate = XML_PARSER_EOF;
12420
137k
    ctxt->disableSAX = 1;
12421
145k
    while (ctxt->inputNr > 1)
12422
8.16k
        xmlFreeInputStream(inputPop(ctxt));
12423
137k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
137k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
137k
        if (ctxt->input->buf != NULL) {
12433
118k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
118k
            ctxt->input->buf = NULL;
12435
118k
        }
12436
137k
  ctxt->input->cur = BAD_CAST"";
12437
137k
        ctxt->input->length = 0;
12438
137k
  ctxt->input->base = ctxt->input->cur;
12439
137k
        ctxt->input->end = ctxt->input->cur;
12440
137k
    }
12441
137k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
76.2k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
76.2k
    if (ctxt == NULL)
12452
0
        return;
12453
76.2k
    xmlHaltParser(ctxt);
12454
76.2k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
76.2k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
305k
          const xmlChar *ID, xmlNodePtr *list) {
12832
305k
    xmlParserCtxtPtr ctxt;
12833
305k
    xmlDocPtr newDoc;
12834
305k
    xmlNodePtr newRoot;
12835
305k
    xmlParserErrors ret = XML_ERR_OK;
12836
305k
    xmlChar start[4];
12837
305k
    xmlCharEncoding enc;
12838
12839
305k
    if (((depth > 40) &&
12840
305k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
305k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
305k
    if (list != NULL)
12848
31.9k
        *list = NULL;
12849
305k
    if ((URL == NULL) && (ID == NULL))
12850
157
  return(XML_ERR_INTERNAL_ERROR);
12851
305k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
305k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
305k
                                             oldctxt);
12856
305k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
42.9k
    if (oldctxt != NULL) {
12858
42.9k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
42.9k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
42.9k
    }
12861
42.9k
    xmlDetectSAX2(ctxt);
12862
12863
42.9k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
42.9k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
42.9k
    newDoc->properties = XML_DOC_INTERNAL;
12869
42.9k
    if (doc) {
12870
42.9k
        newDoc->intSubset = doc->intSubset;
12871
42.9k
        newDoc->extSubset = doc->extSubset;
12872
42.9k
        if (doc->dict) {
12873
27.3k
            newDoc->dict = doc->dict;
12874
27.3k
            xmlDictReference(newDoc->dict);
12875
27.3k
        }
12876
42.9k
        if (doc->URL != NULL) {
12877
28.1k
            newDoc->URL = xmlStrdup(doc->URL);
12878
28.1k
        }
12879
42.9k
    }
12880
42.9k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
42.9k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
42.9k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
42.9k
    nodePush(ctxt, newDoc->children);
12891
42.9k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
42.9k
    } else {
12894
42.9k
        ctxt->myDoc = doc;
12895
42.9k
        newRoot->doc = doc;
12896
42.9k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
42.9k
    GROW;
12904
42.9k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
41.1k
  start[0] = RAW;
12906
41.1k
  start[1] = NXT(1);
12907
41.1k
  start[2] = NXT(2);
12908
41.1k
  start[3] = NXT(3);
12909
41.1k
  enc = xmlDetectCharEncoding(start, 4);
12910
41.1k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
1.50k
      xmlSwitchEncoding(ctxt, enc);
12912
1.50k
  }
12913
41.1k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
42.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
1.02k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
1.02k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
1.02k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
36
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
36
                           "Version mismatch between document and entity\n");
12927
36
        }
12928
1.02k
    }
12929
12930
42.9k
    ctxt->instate = XML_PARSER_CONTENT;
12931
42.9k
    ctxt->depth = depth;
12932
42.9k
    if (oldctxt != NULL) {
12933
42.9k
  ctxt->_private = oldctxt->_private;
12934
42.9k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
42.9k
  ctxt->validate = oldctxt->validate;
12936
42.9k
  ctxt->valid = oldctxt->valid;
12937
42.9k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
42.9k
        if (oldctxt->validate) {
12939
25.6k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
25.6k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
25.6k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
25.6k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
25.6k
        }
12944
42.9k
  ctxt->external = oldctxt->external;
12945
42.9k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
42.9k
        ctxt->dict = oldctxt->dict;
12947
42.9k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
42.9k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
42.9k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
42.9k
        ctxt->dictNames = oldctxt->dictNames;
12951
42.9k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
42.9k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
42.9k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
42.9k
  ctxt->record_info = oldctxt->record_info;
12955
42.9k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
42.9k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
42.9k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
42.9k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
42.9k
    xmlParseContent(ctxt);
12970
12971
42.9k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
1.15k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
41.7k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
42.9k
    if (ctxt->node != newDoc->children) {
12977
7.11k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
7.11k
    }
12979
12980
42.9k
    if (!ctxt->wellFormed) {
12981
13.3k
  ret = (xmlParserErrors)ctxt->errNo;
12982
13.3k
        if (oldctxt != NULL) {
12983
13.3k
            oldctxt->errNo = ctxt->errNo;
12984
13.3k
            oldctxt->wellFormed = 0;
12985
13.3k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
13.3k
        }
12987
29.5k
    } else {
12988
29.5k
  if (list != NULL) {
12989
3.91k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
3.91k
      cur = newDoc->children->children;
12996
3.91k
      *list = cur;
12997
146k
      while (cur != NULL) {
12998
142k
    cur->parent = NULL;
12999
142k
    cur = cur->next;
13000
142k
      }
13001
3.91k
            newDoc->children->children = NULL;
13002
3.91k
  }
13003
29.5k
  ret = XML_ERR_OK;
13004
29.5k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
42.9k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
42.9k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
42.9k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
42.9k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
42.9k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
42.9k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
42.9k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
42.9k
    }
13020
13021
42.9k
    if (oldctxt != NULL) {
13022
42.9k
        ctxt->dict = NULL;
13023
42.9k
        ctxt->attsDefault = NULL;
13024
42.9k
        ctxt->attsSpecial = NULL;
13025
42.9k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
42.9k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
42.9k
        oldctxt->validate = ctxt->validate;
13028
42.9k
        oldctxt->valid = ctxt->valid;
13029
42.9k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
42.9k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
42.9k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
42.9k
    }
13033
42.9k
    ctxt->node_seq.maximum = 0;
13034
42.9k
    ctxt->node_seq.length = 0;
13035
42.9k
    ctxt->node_seq.buffer = NULL;
13036
42.9k
    xmlFreeParserCtxt(ctxt);
13037
42.9k
    newDoc->intSubset = NULL;
13038
42.9k
    newDoc->extSubset = NULL;
13039
42.9k
    xmlFreeDoc(newDoc);
13040
13041
42.9k
    return(ret);
13042
42.9k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
55.8k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
55.8k
    xmlParserCtxtPtr ctxt;
13125
55.8k
    xmlDocPtr newDoc = NULL;
13126
55.8k
    xmlNodePtr newRoot;
13127
55.8k
    xmlSAXHandlerPtr oldsax = NULL;
13128
55.8k
    xmlNodePtr content = NULL;
13129
55.8k
    xmlNodePtr last = NULL;
13130
55.8k
    int size;
13131
55.8k
    xmlParserErrors ret = XML_ERR_OK;
13132
55.8k
#ifdef SAX2
13133
55.8k
    int i;
13134
55.8k
#endif
13135
13136
55.8k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
55.8k
        (oldctxt->depth >  100)) {
13138
57
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
57
                       "Maximum entity nesting depth exceeded");
13140
57
  return(XML_ERR_ENTITY_LOOP);
13141
57
    }
13142
13143
13144
55.8k
    if (lst != NULL)
13145
40.2k
        *lst = NULL;
13146
55.8k
    if (string == NULL)
13147
27
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
55.7k
    size = xmlStrlen(string);
13150
13151
55.7k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
55.7k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
46.3k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
46.3k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
46.3k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
46.3k
    else
13158
46.3k
  ctxt->userData = ctxt;
13159
46.3k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
46.3k
    ctxt->dict = oldctxt->dict;
13161
46.3k
    ctxt->input_id = oldctxt->input_id;
13162
46.3k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
46.3k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
46.3k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
46.3k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
46.6k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
243
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
243
    }
13171
46.3k
#endif
13172
13173
46.3k
    oldsax = ctxt->sax;
13174
46.3k
    ctxt->sax = oldctxt->sax;
13175
46.3k
    xmlDetectSAX2(ctxt);
13176
46.3k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
46.3k
    ctxt->options = oldctxt->options;
13178
13179
46.3k
    ctxt->_private = oldctxt->_private;
13180
46.3k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
46.3k
    } else {
13193
46.3k
  ctxt->myDoc = oldctxt->myDoc;
13194
46.3k
        content = ctxt->myDoc->children;
13195
46.3k
  last = ctxt->myDoc->last;
13196
46.3k
    }
13197
46.3k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
46.3k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
46.3k
    ctxt->myDoc->children = NULL;
13208
46.3k
    ctxt->myDoc->last = NULL;
13209
46.3k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
46.3k
    nodePush(ctxt, ctxt->myDoc->children);
13211
46.3k
    ctxt->instate = XML_PARSER_CONTENT;
13212
46.3k
    ctxt->depth = oldctxt->depth;
13213
13214
46.3k
    ctxt->validate = 0;
13215
46.3k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
46.3k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
38.3k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
38.3k
    }
13222
46.3k
    ctxt->dictNames = oldctxt->dictNames;
13223
46.3k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
46.3k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
46.3k
    xmlParseContent(ctxt);
13227
46.3k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
135
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
46.2k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
46.3k
    if (ctxt->node != ctxt->myDoc->children) {
13233
684
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
684
    }
13235
13236
46.3k
    if (!ctxt->wellFormed) {
13237
7.21k
  ret = (xmlParserErrors)ctxt->errNo;
13238
7.21k
        oldctxt->errNo = ctxt->errNo;
13239
7.21k
        oldctxt->wellFormed = 0;
13240
7.21k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
39.1k
    } else {
13242
39.1k
        ret = XML_ERR_OK;
13243
39.1k
    }
13244
13245
46.3k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
32.8k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
32.8k
  cur = ctxt->myDoc->children->children;
13253
32.8k
  *lst = cur;
13254
130k
  while (cur != NULL) {
13255
97.1k
#ifdef LIBXML_VALID_ENABLED
13256
97.1k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
97.1k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
97.1k
    (cur->type == XML_ELEMENT_NODE)) {
13259
19.4k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
19.4k
      oldctxt->myDoc, cur);
13261
19.4k
      }
13262
97.1k
#endif /* LIBXML_VALID_ENABLED */
13263
97.1k
      cur->parent = NULL;
13264
97.1k
      cur = cur->next;
13265
97.1k
  }
13266
32.8k
  ctxt->myDoc->children->children = NULL;
13267
32.8k
    }
13268
46.3k
    if (ctxt->myDoc != NULL) {
13269
46.3k
  xmlFreeNode(ctxt->myDoc->children);
13270
46.3k
        ctxt->myDoc->children = content;
13271
46.3k
        ctxt->myDoc->last = last;
13272
46.3k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
46.3k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
46.3k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
46.3k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
46.3k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
46.3k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
46.3k
    }
13285
13286
46.3k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
46.3k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
46.3k
    ctxt->sax = oldsax;
13289
46.3k
    ctxt->dict = NULL;
13290
46.3k
    ctxt->attsDefault = NULL;
13291
46.3k
    ctxt->attsSpecial = NULL;
13292
46.3k
    xmlFreeParserCtxt(ctxt);
13293
46.3k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
46.3k
    return(ret);
13298
46.3k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
305k
        xmlParserCtxtPtr pctx) {
13783
305k
    xmlParserCtxtPtr ctxt;
13784
305k
    xmlParserInputPtr inputStream;
13785
305k
    char *directory = NULL;
13786
305k
    xmlChar *uri;
13787
13788
305k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
305k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
305k
    if (pctx != NULL) {
13794
305k
        ctxt->options = pctx->options;
13795
305k
        ctxt->_private = pctx->_private;
13796
305k
  ctxt->input_id = pctx->input_id;
13797
305k
    }
13798
13799
    /* Don't read from stdin. */
13800
305k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
305k
    uri = xmlBuildURI(URL, base);
13804
13805
305k
    if (uri == NULL) {
13806
6.27k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
6.27k
  if (inputStream == NULL) {
13808
5.10k
      xmlFreeParserCtxt(ctxt);
13809
5.10k
      return(NULL);
13810
5.10k
  }
13811
13812
1.17k
  inputPush(ctxt, inputStream);
13813
13814
1.17k
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
1.17k
      directory = xmlParserGetDirectory((char *)URL);
13816
1.17k
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
1.17k
      ctxt->directory = directory;
13818
299k
    } else {
13819
299k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
299k
  if (inputStream == NULL) {
13821
257k
      xmlFree(uri);
13822
257k
      xmlFreeParserCtxt(ctxt);
13823
257k
      return(NULL);
13824
257k
  }
13825
13826
41.7k
  inputPush(ctxt, inputStream);
13827
13828
41.7k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
41.7k
      directory = xmlParserGetDirectory((char *)uri);
13830
41.7k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
41.7k
      ctxt->directory = directory;
13832
41.7k
  xmlFree(uri);
13833
41.7k
    }
13834
42.9k
    return(ctxt);
13835
305k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
131k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
131k
    xmlParserCtxtPtr ctxt;
14178
131k
    xmlParserInputPtr input;
14179
131k
    xmlParserInputBufferPtr buf;
14180
14181
131k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
131k
    if (size <= 0)
14184
9.44k
  return(NULL);
14185
14186
122k
    ctxt = xmlNewParserCtxt();
14187
122k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
122k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
122k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
122k
    input = xmlNewInputStream(ctxt);
14197
122k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
122k
    input->filename = NULL;
14204
122k
    input->buf = buf;
14205
122k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
122k
    inputPush(ctxt, input);
14208
122k
    return(ctxt);
14209
122k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
116M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
116M
    if (xmlParserInitialized != 0)
14525
116M
  return;
14526
14527
1.48k
#ifdef LIBXML_THREAD_ENABLED
14528
1.48k
    __xmlGlobalInitMutexLock();
14529
1.48k
    if (xmlParserInitialized == 0) {
14530
1.48k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
1.48k
  xmlInitThreadsInternal();
14537
1.48k
  xmlInitGlobalsInternal();
14538
1.48k
  xmlInitMemoryInternal();
14539
1.48k
        __xmlInitializeDict();
14540
1.48k
  xmlInitEncodingInternal();
14541
1.48k
  xmlRegisterDefaultInputCallbacks();
14542
1.48k
#ifdef LIBXML_OUTPUT_ENABLED
14543
1.48k
  xmlRegisterDefaultOutputCallbacks();
14544
1.48k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
1.48k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
1.48k
  xmlInitXPathInternal();
14547
1.48k
#endif
14548
1.48k
  xmlParserInitialized = 1;
14549
1.48k
#ifdef LIBXML_THREAD_ENABLED
14550
1.48k
    }
14551
1.48k
    __xmlGlobalInitMutexUnlock();
14552
1.48k
#endif
14553
1.48k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
228k
{
14843
228k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
228k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
228k
    if (options & XML_PARSE_RECOVER) {
14851
124k
        ctxt->recovery = 1;
14852
124k
        options -= XML_PARSE_RECOVER;
14853
124k
  ctxt->options |= XML_PARSE_RECOVER;
14854
124k
    } else
14855
103k
        ctxt->recovery = 0;
14856
228k
    if (options & XML_PARSE_DTDLOAD) {
14857
138k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
138k
        options -= XML_PARSE_DTDLOAD;
14859
138k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
138k
    } else
14861
89.4k
        ctxt->loadsubset = 0;
14862
228k
    if (options & XML_PARSE_DTDATTR) {
14863
90.7k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
90.7k
        options -= XML_PARSE_DTDATTR;
14865
90.7k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
90.7k
    }
14867
228k
    if (options & XML_PARSE_NOENT) {
14868
129k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
129k
        options -= XML_PARSE_NOENT;
14871
129k
  ctxt->options |= XML_PARSE_NOENT;
14872
129k
    } else
14873
99.0k
        ctxt->replaceEntities = 0;
14874
228k
    if (options & XML_PARSE_PEDANTIC) {
14875
32.1k
        ctxt->pedantic = 1;
14876
32.1k
        options -= XML_PARSE_PEDANTIC;
14877
32.1k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
32.1k
    } else
14879
196k
        ctxt->pedantic = 0;
14880
228k
    if (options & XML_PARSE_NOBLANKS) {
14881
88.1k
        ctxt->keepBlanks = 0;
14882
88.1k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
88.1k
        options -= XML_PARSE_NOBLANKS;
14884
88.1k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
88.1k
    } else
14886
140k
        ctxt->keepBlanks = 1;
14887
228k
    if (options & XML_PARSE_DTDVALID) {
14888
105k
        ctxt->validate = 1;
14889
105k
        if (options & XML_PARSE_NOWARNING)
14890
49.7k
            ctxt->vctxt.warning = NULL;
14891
105k
        if (options & XML_PARSE_NOERROR)
14892
82.4k
            ctxt->vctxt.error = NULL;
14893
105k
        options -= XML_PARSE_DTDVALID;
14894
105k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
105k
    } else
14896
123k
        ctxt->validate = 0;
14897
228k
    if (options & XML_PARSE_NOWARNING) {
14898
96.8k
        ctxt->sax->warning = NULL;
14899
96.8k
        options -= XML_PARSE_NOWARNING;
14900
96.8k
    }
14901
228k
    if (options & XML_PARSE_NOERROR) {
14902
136k
        ctxt->sax->error = NULL;
14903
136k
        ctxt->sax->fatalError = NULL;
14904
136k
        options -= XML_PARSE_NOERROR;
14905
136k
    }
14906
228k
#ifdef LIBXML_SAX1_ENABLED
14907
228k
    if (options & XML_PARSE_SAX1) {
14908
84.5k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
84.5k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
84.5k
        ctxt->sax->startElementNs = NULL;
14911
84.5k
        ctxt->sax->endElementNs = NULL;
14912
84.5k
        ctxt->sax->initialized = 1;
14913
84.5k
        options -= XML_PARSE_SAX1;
14914
84.5k
  ctxt->options |= XML_PARSE_SAX1;
14915
84.5k
    }
14916
228k
#endif /* LIBXML_SAX1_ENABLED */
14917
228k
    if (options & XML_PARSE_NODICT) {
14918
81.5k
        ctxt->dictNames = 0;
14919
81.5k
        options -= XML_PARSE_NODICT;
14920
81.5k
  ctxt->options |= XML_PARSE_NODICT;
14921
146k
    } else {
14922
146k
        ctxt->dictNames = 1;
14923
146k
    }
14924
228k
    if (options & XML_PARSE_NOCDATA) {
14925
88.3k
        ctxt->sax->cdataBlock = NULL;
14926
88.3k
        options -= XML_PARSE_NOCDATA;
14927
88.3k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
88.3k
    }
14929
228k
    if (options & XML_PARSE_NSCLEAN) {
14930
134k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
134k
        options -= XML_PARSE_NSCLEAN;
14932
134k
    }
14933
228k
    if (options & XML_PARSE_NONET) {
14934
83.4k
  ctxt->options |= XML_PARSE_NONET;
14935
83.4k
        options -= XML_PARSE_NONET;
14936
83.4k
    }
14937
228k
    if (options & XML_PARSE_COMPACT) {
14938
135k
  ctxt->options |= XML_PARSE_COMPACT;
14939
135k
        options -= XML_PARSE_COMPACT;
14940
135k
    }
14941
228k
    if (options & XML_PARSE_OLD10) {
14942
78.8k
  ctxt->options |= XML_PARSE_OLD10;
14943
78.8k
        options -= XML_PARSE_OLD10;
14944
78.8k
    }
14945
228k
    if (options & XML_PARSE_NOBASEFIX) {
14946
92.7k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
92.7k
        options -= XML_PARSE_NOBASEFIX;
14948
92.7k
    }
14949
228k
    if (options & XML_PARSE_HUGE) {
14950
84.2k
  ctxt->options |= XML_PARSE_HUGE;
14951
84.2k
        options -= XML_PARSE_HUGE;
14952
84.2k
        if (ctxt->dict != NULL)
14953
84.2k
            xmlDictSetLimit(ctxt->dict, 0);
14954
84.2k
    }
14955
228k
    if (options & XML_PARSE_OLDSAX) {
14956
78.8k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
78.8k
        options -= XML_PARSE_OLDSAX;
14958
78.8k
    }
14959
228k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
128k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
128k
        options -= XML_PARSE_IGNORE_ENC;
14962
128k
    }
14963
228k
    if (options & XML_PARSE_BIG_LINES) {
14964
88.6k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
88.6k
        options -= XML_PARSE_BIG_LINES;
14966
88.6k
    }
14967
228k
    ctxt->linenumbers = 1;
14968
228k
    return (options);
14969
228k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
152k
{
14984
152k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
152k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
76.0k
{
15003
76.0k
    xmlDocPtr ret;
15004
15005
76.0k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
76.0k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
76.0k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
76.0k
        (ctxt->input->filename == NULL))
15015
76.0k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
76.0k
    xmlParseDocument(ctxt);
15017
76.0k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
47.8k
        ret = ctxt->myDoc;
15019
28.2k
    else {
15020
28.2k
        ret = NULL;
15021
28.2k
  if (ctxt->myDoc != NULL) {
15022
26.4k
      xmlFreeDoc(ctxt->myDoc);
15023
26.4k
  }
15024
28.2k
    }
15025
76.0k
    ctxt->myDoc = NULL;
15026
76.0k
    if (!reuse) {
15027
76.0k
  xmlFreeParserCtxt(ctxt);
15028
76.0k
    }
15029
15030
76.0k
    return (ret);
15031
76.0k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
76.0k
{
15096
76.0k
    xmlParserCtxtPtr ctxt;
15097
15098
76.0k
    xmlInitParser();
15099
76.0k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
76.0k
    if (ctxt == NULL)
15101
26
        return (NULL);
15102
76.0k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
76.0k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387