Coverage Report

Created: 2024-05-08 16:09

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
10.2M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
1.13k
#define XML_PARSER_NON_LINEAR 10
129
130
84.4M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
38.2M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
9.21G
#define XML_PARSER_BUFFER_SIZE 100
147
250k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
11.5M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
1.84k
{
215
1.84k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
1.84k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
1.84k
    if (ctxt != NULL)
219
1.84k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
1.84k
    if (prefix == NULL)
222
835
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
835
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
835
                        (const char *) localname, NULL, NULL, 0, 0,
225
835
                        "Attribute %s redefined\n", localname);
226
1.00k
    else
227
1.00k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
1.00k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
1.00k
                        (const char *) prefix, (const char *) localname,
230
1.00k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
1.00k
                        localname);
232
1.84k
    if (ctxt != NULL) {
233
1.84k
  ctxt->wellFormed = 0;
234
1.84k
  if (ctxt->recovery == 0)
235
697
      ctxt->disableSAX = 1;
236
1.84k
    }
237
1.84k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
656k
{
250
656k
    const char *errmsg;
251
252
656k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
656k
        (ctxt->instate == XML_PARSER_EOF))
254
4.47k
  return;
255
652k
    switch (error) {
256
1.89k
        case XML_ERR_INVALID_HEX_CHARREF:
257
1.89k
            errmsg = "CharRef: invalid hexadecimal value";
258
1.89k
            break;
259
4.49k
        case XML_ERR_INVALID_DEC_CHARREF:
260
4.49k
            errmsg = "CharRef: invalid decimal value";
261
4.49k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
49.2k
        case XML_ERR_INTERNAL_ERROR:
266
49.2k
            errmsg = "internal error";
267
49.2k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
397k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
397k
            errmsg = "PEReference: expecting ';'";
282
397k
            break;
283
480
        case XML_ERR_ENTITY_LOOP:
284
480
            errmsg = "Detected an entity reference loop";
285
480
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
67
        case XML_ERR_ENTITY_PE_INTERNAL:
290
67
            errmsg = "PEReferences forbidden in internal subset";
291
67
            break;
292
1.30k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
1.30k
            errmsg = "EntityValue: \" or ' expected";
294
1.30k
            break;
295
5.26k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
5.26k
            errmsg = "AttValue: \" or ' expected";
297
5.26k
            break;
298
9.00k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
9.00k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
9.00k
            break;
301
1.75k
        case XML_ERR_LITERAL_NOT_STARTED:
302
1.75k
            errmsg = "SystemLiteral \" or ' expected";
303
1.75k
            break;
304
1.75k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
1.75k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
1.75k
            break;
307
2.03k
        case XML_ERR_MISPLACED_CDATA_END:
308
2.03k
            errmsg = "Sequence ']]>' not allowed in content";
309
2.03k
            break;
310
1.51k
        case XML_ERR_URI_REQUIRED:
311
1.51k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
1.51k
            break;
313
233
        case XML_ERR_PUBID_REQUIRED:
314
233
            errmsg = "PUBLIC, the Public Identifier is missing";
315
233
            break;
316
1.72k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
1.72k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
1.72k
            break;
319
1.06k
        case XML_ERR_PI_NOT_STARTED:
320
1.06k
            errmsg = "xmlParsePI : no target name";
321
1.06k
            break;
322
215
        case XML_ERR_RESERVED_XML_NAME:
323
215
            errmsg = "Invalid PI name";
324
215
            break;
325
81
        case XML_ERR_NOTATION_NOT_STARTED:
326
81
            errmsg = "NOTATION: Name expected here";
327
81
            break;
328
215
        case XML_ERR_NOTATION_NOT_FINISHED:
329
215
            errmsg = "'>' required to close NOTATION declaration";
330
215
            break;
331
1.96k
        case XML_ERR_VALUE_REQUIRED:
332
1.96k
            errmsg = "Entity value required";
333
1.96k
            break;
334
69
        case XML_ERR_URI_FRAGMENT:
335
69
            errmsg = "Fragment not allowed";
336
69
            break;
337
2.18k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
2.18k
            errmsg = "'(' required to start ATTLIST enumeration";
339
2.18k
            break;
340
239
        case XML_ERR_NMTOKEN_REQUIRED:
341
239
            errmsg = "NmToken expected in ATTLIST enumeration";
342
239
            break;
343
651
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
651
            errmsg = "')' required to finish ATTLIST enumeration";
345
651
            break;
346
1.12k
        case XML_ERR_MIXED_NOT_STARTED:
347
1.12k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
1.12k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
2.30k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
2.30k
            errmsg = "ContentDecl : Name or '(' expected";
354
2.30k
            break;
355
2.19k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
2.19k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
2.19k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
24.2k
        case XML_ERR_GT_REQUIRED:
363
24.2k
            errmsg = "expected '>'";
364
24.2k
            break;
365
60
        case XML_ERR_CONDSEC_INVALID:
366
60
            errmsg = "XML conditional section '[' expected";
367
60
            break;
368
8.38k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
8.38k
            errmsg = "Content error in the external subset";
370
8.38k
            break;
371
365
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
365
            errmsg =
373
365
                "conditional section INCLUDE or IGNORE keyword expected";
374
365
            break;
375
155
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
155
            errmsg = "XML conditional section not closed";
377
155
            break;
378
68
        case XML_ERR_XMLDECL_NOT_STARTED:
379
68
            errmsg = "Text declaration '<?xml' required";
380
68
            break;
381
28.1k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
28.1k
            errmsg = "parsing XML declaration: '?>' expected";
383
28.1k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
8.86k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
8.86k
            errmsg = "EntityRef: expecting ';'";
389
8.86k
            break;
390
7.08k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
7.08k
            errmsg = "DOCTYPE improperly terminated";
392
7.08k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
1.53k
        case XML_ERR_EQUAL_REQUIRED:
397
1.53k
            errmsg = "expected '='";
398
1.53k
            break;
399
6.47k
        case XML_ERR_STRING_NOT_CLOSED:
400
6.47k
            errmsg = "String not closed expecting \" or '";
401
6.47k
            break;
402
1.50k
        case XML_ERR_STRING_NOT_STARTED:
403
1.50k
            errmsg = "String not started expecting ' or \"";
404
1.50k
            break;
405
252
        case XML_ERR_ENCODING_NAME:
406
252
            errmsg = "Invalid XML encoding name";
407
252
            break;
408
318
        case XML_ERR_STANDALONE_VALUE:
409
318
            errmsg = "standalone accepts only 'yes' or 'no'";
410
318
            break;
411
9.35k
        case XML_ERR_DOCUMENT_EMPTY:
412
9.35k
            errmsg = "Document is empty";
413
9.35k
            break;
414
46.3k
        case XML_ERR_DOCUMENT_END:
415
46.3k
            errmsg = "Extra content at the end of the document";
416
46.3k
            break;
417
1.13k
        case XML_ERR_NOT_WELL_BALANCED:
418
1.13k
            errmsg = "chunk is not well balanced";
419
1.13k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
16.9k
        case XML_ERR_VERSION_MISSING:
424
16.9k
            errmsg = "Malformed declaration expecting version";
425
16.9k
            break;
426
0
        case XML_ERR_NAME_TOO_LONG:
427
0
            errmsg = "Name too long";
428
0
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
83
        default:
435
83
            errmsg = "Unregistered error message";
436
652k
    }
437
652k
    if (ctxt != NULL)
438
652k
  ctxt->errNo = error;
439
652k
    if (info == NULL) {
440
602k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
602k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
602k
                        errmsg);
443
602k
    } else {
444
49.2k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
49.2k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
49.2k
                        errmsg, info);
447
49.2k
    }
448
652k
    if (ctxt != NULL) {
449
652k
  ctxt->wellFormed = 0;
450
652k
  if (ctxt->recovery == 0)
451
445k
      ctxt->disableSAX = 1;
452
652k
    }
453
652k
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
240k
{
467
240k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
240k
        (ctxt->instate == XML_PARSER_EOF))
469
62
  return;
470
240k
    if (ctxt != NULL)
471
240k
  ctxt->errNo = error;
472
240k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
240k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
240k
    if (ctxt != NULL) {
475
240k
  ctxt->wellFormed = 0;
476
240k
  if (ctxt->recovery == 0)
477
86.0k
      ctxt->disableSAX = 1;
478
240k
    }
479
240k
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
5.81M
{
495
5.81M
    xmlStructuredErrorFunc schannel = NULL;
496
497
5.81M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
5.81M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
5.81M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
5.81M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
3.61M
        schannel = ctxt->sax->serror;
503
5.81M
    if (ctxt != NULL) {
504
5.81M
        __xmlRaiseError(schannel,
505
5.81M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
5.81M
                    ctxt->userData,
507
5.81M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
5.81M
                    XML_ERR_WARNING, NULL, 0,
509
5.81M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
5.81M
        msg, (const char *) str1, (const char *) str2);
511
5.81M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
5.81M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
2.00k
{
533
2.00k
    xmlStructuredErrorFunc schannel = NULL;
534
535
2.00k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
2.00k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
2.00k
    if (ctxt != NULL) {
539
2.00k
  ctxt->errNo = error;
540
2.00k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
685
      schannel = ctxt->sax->serror;
542
2.00k
    }
543
2.00k
    if (ctxt != NULL) {
544
2.00k
        __xmlRaiseError(schannel,
545
2.00k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
2.00k
                    ctxt, NULL, XML_FROM_DTD, error,
547
2.00k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
2.00k
        (const char *) str2, NULL, 0, 0,
549
2.00k
        msg, (const char *) str1, (const char *) str2);
550
2.00k
  ctxt->valid = 0;
551
2.00k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
2.00k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
331k
{
573
331k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
331k
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
331k
    if (ctxt != NULL)
577
331k
  ctxt->errNo = error;
578
331k
    __xmlRaiseError(NULL, NULL, NULL,
579
331k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
331k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
331k
    if (ctxt != NULL) {
582
331k
  ctxt->wellFormed = 0;
583
331k
  if (ctxt->recovery == 0)
584
21.0k
      ctxt->disableSAX = 1;
585
331k
    }
586
331k
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
99.9k
{
604
99.9k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
99.9k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
99.9k
    if (ctxt != NULL)
608
99.9k
  ctxt->errNo = error;
609
99.9k
    __xmlRaiseError(NULL, NULL, NULL,
610
99.9k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
99.9k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
99.9k
        NULL, val, 0, msg, str1, val, str2);
613
99.9k
    if (ctxt != NULL) {
614
99.9k
  ctxt->wellFormed = 0;
615
99.9k
  if (ctxt->recovery == 0)
616
32.1k
      ctxt->disableSAX = 1;
617
99.9k
    }
618
99.9k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
5.50M
{
633
5.50M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
5.50M
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
5.50M
    if (ctxt != NULL)
637
5.50M
  ctxt->errNo = error;
638
5.50M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
5.50M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
5.50M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
5.50M
                    val);
642
5.50M
    if (ctxt != NULL) {
643
5.50M
  ctxt->wellFormed = 0;
644
5.50M
  if (ctxt->recovery == 0)
645
4.47M
      ctxt->disableSAX = 1;
646
5.50M
    }
647
5.50M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
5.16k
{
662
5.16k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
5.16k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
5.16k
    if (ctxt != NULL)
666
5.16k
  ctxt->errNo = error;
667
5.16k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
5.16k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
5.16k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
5.16k
                    val);
671
5.16k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
44.5k
{
689
44.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
44.5k
        (ctxt->instate == XML_PARSER_EOF))
691
0
  return;
692
44.5k
    if (ctxt != NULL)
693
44.5k
  ctxt->errNo = error;
694
44.5k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
44.5k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
44.5k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
44.5k
                    info1, info2, info3);
698
44.5k
    if (ctxt != NULL)
699
44.5k
  ctxt->nsWellFormed = 0;
700
44.5k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
606
{
718
606
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
606
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
606
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
606
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
606
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
606
                    info1, info2, info3);
725
606
}
726
727
static void
728
279M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
279M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
279M
    else
732
279M
        *dst += val;
733
279M
}
734
735
static void
736
86.2M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
86.2M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
86.2M
    else
740
86.2M
        *dst += val;
741
86.2M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
84.4M
{
770
84.4M
    unsigned long consumed;
771
84.4M
    xmlParserInputPtr input = ctxt->input;
772
84.4M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
84.4M
    consumed = input->parentConsumed;
779
84.4M
    if ((entity == NULL) ||
780
84.4M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
60.0M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
24.4M
        xmlSaturatedAdd(&consumed, input->consumed);
783
24.4M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
24.4M
    }
785
84.4M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
84.4M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
84.4M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
84.4M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
84.4M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
1.13k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
1.13k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
1.13k
                       "Maximum entity amplification factor exceeded");
803
1.13k
        xmlHaltParser(ctxt);
804
1.13k
        return(1);
805
1.13k
    }
806
807
84.4M
    return(0);
808
84.4M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
294k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
294k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
294k
    (void) sax;
1048
1049
294k
    if (ctxt == NULL) return;
1050
294k
    sax = ctxt->sax;
1051
294k
#ifdef LIBXML_SAX1_ENABLED
1052
294k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
294k
        ((sax->startElementNs != NULL) ||
1054
195k
         (sax->endElementNs != NULL) ||
1055
195k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
195k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
294k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
294k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
294k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
294k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
294k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
294k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
87.3k
{
1103
87.3k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
131k
    while (*src == 0x20) src++;
1107
821k
    while (*src != 0) {
1108
733k
  if (*src == 0x20) {
1109
134k
      while (*src == 0x20) src++;
1110
24.2k
      if (*src != 0)
1111
21.9k
    *dst++ = 0x20;
1112
709k
  } else {
1113
709k
      *dst++ = *src++;
1114
709k
  }
1115
733k
    }
1116
87.3k
    *dst = 0;
1117
87.3k
    if (dst == src)
1118
77.7k
       return(NULL);
1119
9.57k
    return(dst);
1120
87.3k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
7.14k
{
1136
7.14k
    int i;
1137
7.14k
    int remove_head = 0;
1138
7.14k
    int need_realloc = 0;
1139
7.14k
    const xmlChar *cur;
1140
1141
7.14k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
7.14k
    i = *len;
1144
7.14k
    if (i <= 0)
1145
308
        return(NULL);
1146
1147
6.83k
    cur = src;
1148
8.53k
    while (*cur == 0x20) {
1149
1.69k
        cur++;
1150
1.69k
  remove_head++;
1151
1.69k
    }
1152
62.8k
    while (*cur != 0) {
1153
57.5k
  if (*cur == 0x20) {
1154
5.72k
      cur++;
1155
5.72k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
1.58k
          need_realloc = 1;
1157
1.58k
    break;
1158
1.58k
      }
1159
5.72k
  } else
1160
51.8k
      cur++;
1161
57.5k
    }
1162
6.83k
    if (need_realloc) {
1163
1.58k
        xmlChar *ret;
1164
1165
1.58k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
1.58k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
1.58k
  xmlAttrNormalizeSpace(ret, ret);
1171
1.58k
  *len = strlen((const char *)ret);
1172
1.58k
        return(ret);
1173
5.25k
    } else if (remove_head) {
1174
64
        *len -= remove_head;
1175
64
        memmove(src, src + remove_head, 1 + *len);
1176
64
  return(src);
1177
64
    }
1178
5.18k
    return(NULL);
1179
6.83k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
179k
               const xmlChar *value) {
1195
179k
    xmlDefAttrsPtr defaults;
1196
179k
    int len;
1197
179k
    const xmlChar *name;
1198
179k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
179k
    if (ctxt->attsSpecial != NULL) {
1204
170k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
66
      return;
1206
170k
    }
1207
1208
179k
    if (ctxt->attsDefault == NULL) {
1209
16.7k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
16.7k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
16.7k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
179k
    name = xmlSplitQName3(fullname, &len);
1219
179k
    if (name == NULL) {
1220
173k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
173k
  prefix = NULL;
1222
173k
    } else {
1223
5.03k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
5.03k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
5.03k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
179k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
179k
    if (defaults == NULL) {
1232
89.0k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
89.0k
                     (4 * 5) * sizeof(const xmlChar *));
1234
89.0k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
89.0k
  defaults->nbAttrs = 0;
1237
89.0k
  defaults->maxAttrs = 4;
1238
89.0k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
89.0k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
90.0k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
1.02k
        xmlDefAttrsPtr temp;
1245
1246
1.02k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
1.02k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
1.02k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
1.02k
  defaults = temp;
1251
1.02k
  defaults->maxAttrs *= 2;
1252
1.02k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
1.02k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
1.02k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
179k
    name = xmlSplitQName3(fullattr, &len);
1264
179k
    if (name == NULL) {
1265
128k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
128k
  prefix = NULL;
1267
128k
    } else {
1268
50.3k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
50.3k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
50.3k
    }
1271
1272
179k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
179k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
179k
    len = xmlStrlen(value);
1276
179k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
179k
    if (value == NULL)
1278
0
        goto mem_error;
1279
179k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
179k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
179k
    if (ctxt->external)
1282
134k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
44.2k
    else
1284
44.2k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
179k
    defaults->nbAttrs++;
1286
1287
179k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
179k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
2.51M
{
1309
2.51M
    if (ctxt->attsSpecial == NULL) {
1310
24.0k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
24.0k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
24.0k
    }
1314
1315
2.51M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
364
        return;
1317
1318
2.51M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
2.51M
                     (void *) (ptrdiff_t) type);
1320
2.51M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
2.51M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
2.15M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
2.15M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
2.15M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
741k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
741k
    }
1341
2.15M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
73.4k
{
1354
73.4k
    if (ctxt->attsSpecial == NULL)
1355
54.7k
        return;
1356
1357
18.6k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
18.6k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
2.03k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
2.03k
        ctxt->attsSpecial = NULL;
1362
2.03k
    }
1363
18.6k
    return;
1364
73.4k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
3.87k
{
1427
3.87k
    const xmlChar *cur = lang, *nxt;
1428
1429
3.87k
    if (cur == NULL)
1430
71
        return (0);
1431
3.80k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
3.80k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
3.80k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
3.80k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
111
        cur += 2;
1441
446
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
446
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
335
            cur++;
1444
111
        return(cur[0] == 0);
1445
111
    }
1446
3.69k
    nxt = cur;
1447
15.0k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
15.0k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
11.3k
           nxt++;
1450
3.69k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
960
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
235
            return(0);
1456
725
        return(1);
1457
960
    }
1458
2.73k
    if (nxt - cur < 2)
1459
333
        return(0);
1460
    /* we got an ISO 639 code */
1461
2.39k
    if (nxt[0] == 0)
1462
986
        return(1);
1463
1.41k
    if (nxt[0] != '-')
1464
175
        return(0);
1465
1466
1.23k
    nxt++;
1467
1.23k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
1.23k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
174
        goto region_m49;
1471
1472
4.50k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
4.50k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
3.43k
           nxt++;
1475
1.06k
    if (nxt - cur == 4)
1476
266
        goto script;
1477
798
    if (nxt - cur == 2)
1478
171
        goto region;
1479
627
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
75
        goto variant;
1481
552
    if (nxt - cur != 3)
1482
143
        return(0);
1483
    /* we parsed an extlang */
1484
409
    if (nxt[0] == 0)
1485
43
        return(1);
1486
366
    if (nxt[0] != '-')
1487
39
        return(0);
1488
1489
327
    nxt++;
1490
327
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
327
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
48
        goto region_m49;
1494
1495
1.64k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
1.64k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
1.36k
           nxt++;
1498
279
    if (nxt - cur == 2)
1499
62
        goto region;
1500
217
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
63
        goto variant;
1502
154
    if (nxt - cur != 4)
1503
97
        return(0);
1504
    /* we parsed a script */
1505
323
script:
1506
323
    if (nxt[0] == 0)
1507
42
        return(1);
1508
281
    if (nxt[0] != '-')
1509
74
        return(0);
1510
1511
207
    nxt++;
1512
207
    cur = nxt;
1513
    /* now we can have region or variant */
1514
207
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
55
        goto region_m49;
1516
1517
809
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
809
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
657
           nxt++;
1520
1521
152
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
45
        goto variant;
1523
107
    if (nxt - cur != 2)
1524
75
        return(0);
1525
    /* we parsed a region */
1526
369
region:
1527
369
    if (nxt[0] == 0)
1528
93
        return(1);
1529
276
    if (nxt[0] != '-')
1530
165
        return(0);
1531
1532
111
    nxt++;
1533
111
    cur = nxt;
1534
    /* now we can just have a variant */
1535
587
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
587
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
476
           nxt++;
1538
1539
111
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
86
        return(0);
1541
1542
    /* we parsed a variant */
1543
208
variant:
1544
208
    if (nxt[0] == 0)
1545
31
        return(1);
1546
177
    if (nxt[0] != '-')
1547
142
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
35
    return (1);
1550
1551
277
region_m49:
1552
277
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
277
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
104
        nxt += 3;
1555
104
        goto region;
1556
104
    }
1557
173
    return(0);
1558
277
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
36.7k
{
1584
36.7k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
4.87k
        int i;
1586
7.97k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
3.29k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
190
          if (ctxt->nsTab[i + 1] == URL)
1590
19
        return(-2);
1591
    /* out of scope keep it */
1592
171
    break;
1593
190
      }
1594
3.29k
  }
1595
4.87k
    }
1596
36.7k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
18.6k
  ctxt->nsMax = 10;
1598
18.6k
  ctxt->nsNr = 0;
1599
18.6k
  ctxt->nsTab = (const xmlChar **)
1600
18.6k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
18.6k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
18.6k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
33
        const xmlChar ** tmp;
1608
33
        ctxt->nsMax *= 2;
1609
33
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
33
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
33
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
33
  ctxt->nsTab = tmp;
1617
33
    }
1618
36.7k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
36.7k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
36.7k
    return (ctxt->nsNr);
1621
36.7k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
8.85k
{
1634
8.85k
    int i;
1635
1636
8.85k
    if (ctxt->nsTab == NULL) return(0);
1637
8.85k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
8.85k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
39.5k
    for (i = 0;i < nr;i++) {
1645
30.6k
         ctxt->nsNr--;
1646
30.6k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
30.6k
    }
1648
8.85k
    return(nr);
1649
8.85k
}
1650
#endif
1651
1652
static int
1653
34.8k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
34.8k
    const xmlChar **atts;
1655
34.8k
    int *attallocs;
1656
34.8k
    int maxatts;
1657
1658
34.8k
    if (nr + 5 > ctxt->maxatts) {
1659
34.8k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
34.8k
  atts = (const xmlChar **) xmlMalloc(
1661
34.8k
             maxatts * sizeof(const xmlChar *));
1662
34.8k
  if (atts == NULL) goto mem_error;
1663
34.8k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
34.8k
                               (maxatts / 5) * sizeof(int));
1665
34.8k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
34.8k
        if (ctxt->maxatts > 0)
1670
98
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
34.8k
        xmlFree(ctxt->atts);
1672
34.8k
  ctxt->atts = atts;
1673
34.8k
  ctxt->attallocs = attallocs;
1674
34.8k
  ctxt->maxatts = maxatts;
1675
34.8k
    }
1676
34.8k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
34.8k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
60.3M
{
1694
60.3M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
60.3M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
1.72k
        size_t newSize = ctxt->inputMax * 2;
1698
1.72k
        xmlParserInputPtr *tmp;
1699
1700
1.72k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
1.72k
                                               newSize * sizeof(*tmp));
1702
1.72k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
1.72k
        ctxt->inputTab = tmp;
1707
1.72k
        ctxt->inputMax = newSize;
1708
1.72k
    }
1709
60.3M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
60.3M
    ctxt->input = value;
1711
60.3M
    return (ctxt->inputNr++);
1712
60.3M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
60.9M
{
1724
60.9M
    xmlParserInputPtr ret;
1725
1726
60.9M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
60.9M
    if (ctxt->inputNr <= 0)
1729
603k
        return (NULL);
1730
60.3M
    ctxt->inputNr--;
1731
60.3M
    if (ctxt->inputNr > 0)
1732
60.0M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
240k
    else
1734
240k
        ctxt->input = NULL;
1735
60.3M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
60.3M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
60.3M
    return (ret);
1738
60.9M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
3.12M
{
1751
3.12M
    if (ctxt == NULL) return(0);
1752
3.12M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
474
        xmlNodePtr *tmp;
1754
1755
474
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
474
                                      ctxt->nodeMax * 2 *
1757
474
                                      sizeof(ctxt->nodeTab[0]));
1758
474
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
474
        ctxt->nodeTab = tmp;
1763
474
  ctxt->nodeMax *= 2;
1764
474
    }
1765
3.12M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
3.12M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
3.12M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
3.12M
    ctxt->node = value;
1775
3.12M
    return (ctxt->nodeNr++);
1776
3.12M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
3.00M
{
1789
3.00M
    xmlNodePtr ret;
1790
1791
3.00M
    if (ctxt == NULL) return(NULL);
1792
3.00M
    if (ctxt->nodeNr <= 0)
1793
22.3k
        return (NULL);
1794
2.98M
    ctxt->nodeNr--;
1795
2.98M
    if (ctxt->nodeNr > 0)
1796
2.89M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
89.9k
    else
1798
89.9k
        ctxt->node = NULL;
1799
2.98M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
2.98M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
2.98M
    return (ret);
1802
3.00M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
3.31M
{
1821
3.31M
    xmlStartTag *tag;
1822
1823
3.31M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
1.02k
        const xmlChar * *tmp;
1825
1.02k
        xmlStartTag *tmp2;
1826
1.02k
        ctxt->nameMax *= 2;
1827
1.02k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
1.02k
                                    ctxt->nameMax *
1829
1.02k
                                    sizeof(ctxt->nameTab[0]));
1830
1.02k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
1.02k
  ctxt->nameTab = tmp;
1835
1.02k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
1.02k
                                    ctxt->nameMax *
1837
1.02k
                                    sizeof(ctxt->pushTab[0]));
1838
1.02k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
1.02k
  ctxt->pushTab = tmp2;
1843
3.31M
    } else if (ctxt->pushTab == NULL) {
1844
125k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
125k
                                            sizeof(ctxt->pushTab[0]));
1846
125k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
125k
    }
1849
3.31M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
3.31M
    ctxt->name = value;
1851
3.31M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
3.31M
    tag->prefix = prefix;
1853
3.31M
    tag->URI = URI;
1854
3.31M
    tag->line = line;
1855
3.31M
    tag->nsNr = nsNr;
1856
3.31M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
3.31M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
956k
{
1873
956k
    const xmlChar *ret;
1874
1875
956k
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
956k
    ctxt->nameNr--;
1878
956k
    if (ctxt->nameNr > 0)
1879
945k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
10.3k
    else
1881
10.3k
        ctxt->name = NULL;
1882
956k
    ret = ctxt->nameTab[ctxt->nameNr];
1883
956k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
956k
    return (ret);
1885
956k
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
2.19M
{
1931
2.19M
    const xmlChar *ret;
1932
1933
2.19M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
2.19M
    ctxt->nameNr--;
1936
2.19M
    if (ctxt->nameNr > 0)
1937
2.15M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
40.0k
    else
1939
40.0k
        ctxt->name = NULL;
1940
2.19M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
2.19M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
2.19M
    return (ret);
1943
2.19M
}
1944
1945
3.66M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
3.66M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
5.58k
        int *tmp;
1948
1949
5.58k
  ctxt->spaceMax *= 2;
1950
5.58k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
5.58k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
5.58k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
5.58k
  ctxt->spaceTab = tmp;
1958
5.58k
    }
1959
3.66M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
3.66M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
3.66M
    return(ctxt->spaceNr++);
1962
3.66M
}
1963
1964
3.56M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
3.56M
    int ret;
1966
3.56M
    if (ctxt->spaceNr <= 0) return(0);
1967
3.56M
    ctxt->spaceNr--;
1968
3.56M
    if (ctxt->spaceNr > 0)
1969
3.55M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
12.1k
    else
1971
12.1k
        ctxt->space = &ctxt->spaceTab[0];
1972
3.56M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
3.56M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
3.56M
    return(ret);
1975
3.56M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
487M
#define RAW (*ctxt->input->cur)
2013
528M
#define CUR (*ctxt->input->cur)
2014
420M
#define NXT(val) ctxt->input->cur[(val)]
2015
4.22M
#define CUR_PTR ctxt->input->cur
2016
80.7k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
66.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
33.3M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
59.3M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
47.2M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
36.9M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
25.7M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
10.0M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
10.0M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
70.7k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
70.7k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
201M
#define SKIP(val) do {             \
2037
201M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
201M
    if (*ctxt->input->cur == 0)           \
2039
201M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
201M
  } while (0)
2041
2042
18.9k
#define SKIPL(val) do {             \
2043
18.9k
    int skipl;                \
2044
5.59M
    for(skipl=0; skipl<val; skipl++) {         \
2045
5.57M
  if (*(ctxt->input->cur) == '\n') {       \
2046
26.8k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
5.55M
  } else ctxt->input->col++;         \
2048
5.57M
  ctxt->input->cur++;           \
2049
5.57M
    }                  \
2050
18.9k
    if (*ctxt->input->cur == 0)           \
2051
18.9k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
18.9k
  } while (0)
2053
2054
149M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
149M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
149M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
149M
  xmlSHRINK (ctxt);
2058
2059
362k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
362k
    if ((ctxt->input->buf) &&
2062
362k
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
1.20k
        xmlParserInputShrink(ctxt->input);
2064
362k
    if (*ctxt->input->cur == 0)
2065
9.36k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
362k
}
2067
2068
505M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
505M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
505M
  xmlGROW (ctxt);
2071
2072
133M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
133M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
133M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
133M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
133M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
133M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
133M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
133M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
133M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
133M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
133M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
1.95M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
133M
}
2095
2096
115M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
334M
#define NEXT xmlNextChar(ctxt)
2099
2100
7.92M
#define NEXT1 {               \
2101
7.92M
  ctxt->input->col++;           \
2102
7.92M
  ctxt->input->cur++;           \
2103
7.92M
  if (*ctxt->input->cur == 0)         \
2104
7.92M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
7.92M
    }
2106
2107
119M
#define NEXTL(l) do {             \
2108
119M
    if (*(ctxt->input->cur) == '\n') {         \
2109
1.11M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
118M
    } else ctxt->input->col++;           \
2111
119M
    ctxt->input->cur += l;        \
2112
119M
  } while (0)
2113
2114
125M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
3.67G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
3.73G
    if (l == 1) b[i++] = v;           \
2119
3.73G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
115M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
115M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
115M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
115M
        (ctxt->instate == XML_PARSER_START)) {
2141
13.5M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
13.5M
  cur = ctxt->input->cur;
2146
13.5M
  while (IS_BLANK_CH(*cur)) {
2147
4.17M
      if (*cur == '\n') {
2148
322k
    ctxt->input->line++; ctxt->input->col = 1;
2149
3.85M
      } else {
2150
3.85M
    ctxt->input->col++;
2151
3.85M
      }
2152
4.17M
      cur++;
2153
4.17M
      if (res < INT_MAX)
2154
4.17M
    res++;
2155
4.17M
      if (*cur == 0) {
2156
26.6k
    ctxt->input->cur = cur;
2157
26.6k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
26.6k
    cur = ctxt->input->cur;
2159
26.6k
      }
2160
4.17M
  }
2161
13.5M
  ctxt->input->cur = cur;
2162
102M
    } else {
2163
102M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
400M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
400M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
172M
    NEXT;
2168
228M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
67.0M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
795k
                    break;
2174
66.2M
          xmlParsePEReference(ctxt);
2175
161M
            } else if (CUR == 0) {
2176
60.0M
                unsigned long consumed;
2177
60.0M
                xmlEntityPtr ent;
2178
2179
60.0M
                if (ctxt->inputNr <= 1)
2180
19.9k
                    break;
2181
2182
60.0M
                consumed = ctxt->input->consumed;
2183
60.0M
                xmlSaturatedAddSizeT(&consumed,
2184
60.0M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
60.0M
                ent = ctxt->input->entity;
2191
60.0M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
60.0M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
2.08k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
2.08k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
2.08k
                }
2197
2198
60.0M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
60.0M
                xmlPopInput(ctxt);
2201
101M
            } else {
2202
101M
                break;
2203
101M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
298M
      if (res < INT_MAX)
2213
298M
    res++;
2214
298M
        }
2215
102M
    }
2216
115M
    return(res);
2217
115M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
60.0M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
60.0M
    xmlParserInputPtr input;
2237
2238
60.0M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
60.0M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
60.0M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
60.0M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
60.0M
    input = inputPop(ctxt);
2247
60.0M
    if (input->entity != NULL)
2248
60.0M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
60.0M
    xmlFreeInputStream(input);
2250
60.0M
    if (*ctxt->input->cur == 0)
2251
28.4M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
60.0M
    return(CUR);
2253
60.0M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
60.1M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
60.1M
    int ret;
2267
60.1M
    if (input == NULL) return(-1);
2268
2269
60.1M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
60.1M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
60.1M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
60.1M
    ret = inputPush(ctxt, input);
2285
60.1M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
60.1M
    GROW;
2288
60.1M
    return(ret);
2289
60.1M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
64.9k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
64.9k
    int val = 0;
2311
64.9k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
64.9k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
64.9k
        (NXT(2) == 'x')) {
2318
16.7k
  SKIP(3);
2319
16.7k
  GROW;
2320
73.1k
  while (RAW != ';') { /* loop blocked by count */
2321
57.8k
      if (count++ > 20) {
2322
2.83k
    count = 0;
2323
2.83k
    GROW;
2324
2.83k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
2.83k
      }
2327
57.8k
      if ((RAW >= '0') && (RAW <= '9'))
2328
44.3k
          val = val * 16 + (CUR - '0');
2329
13.5k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
11.4k
          val = val * 16 + (CUR - 'a') + 10;
2331
2.15k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
656
          val = val * 16 + (CUR - 'A') + 10;
2333
1.50k
      else {
2334
1.50k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
1.50k
    val = 0;
2336
1.50k
    break;
2337
1.50k
      }
2338
56.3k
      if (val > 0x110000)
2339
31.8k
          val = 0x110000;
2340
2341
56.3k
      NEXT;
2342
56.3k
      count++;
2343
56.3k
  }
2344
16.7k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
15.2k
      ctxt->input->col++;
2347
15.2k
      ctxt->input->cur++;
2348
15.2k
  }
2349
48.1k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
48.1k
  SKIP(2);
2351
48.1k
  GROW;
2352
254k
  while (RAW != ';') { /* loop blocked by count */
2353
210k
      if (count++ > 20) {
2354
7.46k
    count = 0;
2355
7.46k
    GROW;
2356
7.46k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
7.46k
      }
2359
210k
      if ((RAW >= '0') && (RAW <= '9'))
2360
206k
          val = val * 10 + (CUR - '0');
2361
3.85k
      else {
2362
3.85k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
3.85k
    val = 0;
2364
3.85k
    break;
2365
3.85k
      }
2366
206k
      if (val > 0x110000)
2367
81.1k
          val = 0x110000;
2368
2369
206k
      NEXT;
2370
206k
      count++;
2371
206k
  }
2372
48.1k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
44.3k
      ctxt->input->col++;
2375
44.3k
      ctxt->input->cur++;
2376
44.3k
  }
2377
48.1k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
64.9k
    if (val >= 0x110000) {
2389
279
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
279
                "xmlParseCharRef: character reference out of bounds\n",
2391
279
          val);
2392
64.6k
    } else if (IS_CHAR(val)) {
2393
58.8k
        return(val);
2394
58.8k
    } else {
2395
5.81k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
5.81k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
5.81k
                    val);
2398
5.81k
    }
2399
6.09k
    return(0);
2400
64.9k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
176k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
176k
    const xmlChar *ptr;
2423
176k
    xmlChar cur;
2424
176k
    int val = 0;
2425
2426
176k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
176k
    ptr = *str;
2428
176k
    cur = *ptr;
2429
176k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
4.89k
  ptr += 3;
2431
4.89k
  cur = *ptr;
2432
12.3k
  while (cur != ';') { /* Non input consuming loop */
2433
7.83k
      if ((cur >= '0') && (cur <= '9'))
2434
3.47k
          val = val * 16 + (cur - '0');
2435
4.36k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
244
          val = val * 16 + (cur - 'a') + 10;
2437
4.11k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
3.72k
          val = val * 16 + (cur - 'A') + 10;
2439
390
      else {
2440
390
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
390
    val = 0;
2442
390
    break;
2443
390
      }
2444
7.44k
      if (val > 0x110000)
2445
1.13k
          val = 0x110000;
2446
2447
7.44k
      ptr++;
2448
7.44k
      cur = *ptr;
2449
7.44k
  }
2450
4.89k
  if (cur == ';')
2451
4.50k
      ptr++;
2452
171k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
171k
  ptr += 2;
2454
171k
  cur = *ptr;
2455
535k
  while (cur != ';') { /* Non input consuming loops */
2456
365k
      if ((cur >= '0') && (cur <= '9'))
2457
364k
          val = val * 10 + (cur - '0');
2458
640
      else {
2459
640
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
640
    val = 0;
2461
640
    break;
2462
640
      }
2463
364k
      if (val > 0x110000)
2464
1.12k
          val = 0x110000;
2465
2466
364k
      ptr++;
2467
364k
      cur = *ptr;
2468
364k
  }
2469
171k
  if (cur == ';')
2470
170k
      ptr++;
2471
171k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
176k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
176k
    if (val >= 0x110000) {
2483
102
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
102
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
102
                val);
2486
175k
    } else if (IS_CHAR(val)) {
2487
174k
        return(val);
2488
174k
    } else {
2489
1.13k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
1.13k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
1.13k
        val);
2492
1.13k
    }
2493
1.24k
    return(0);
2494
176k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.89M
#define growBuffer(buffer, n) {           \
2593
1.89M
    xmlChar *tmp;             \
2594
1.89M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.89M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.89M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.89M
    if (tmp == NULL) goto mem_error;         \
2598
1.89M
    buffer = tmp;             \
2599
1.89M
    buffer##_size = new_size;                                           \
2600
1.89M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
24.7M
                           int check) {
2617
24.7M
    xmlChar *buffer = NULL;
2618
24.7M
    size_t buffer_size = 0;
2619
24.7M
    size_t nbchars = 0;
2620
2621
24.7M
    xmlChar *current = NULL;
2622
24.7M
    xmlChar *rep = NULL;
2623
24.7M
    const xmlChar *last;
2624
24.7M
    xmlEntityPtr ent;
2625
24.7M
    int c,l;
2626
2627
24.7M
    if (str == NULL)
2628
10.0k
        return(NULL);
2629
24.7M
    last = str + len;
2630
2631
24.7M
    if (((ctxt->depth > 40) &&
2632
24.7M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
24.7M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
24.7M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
24.7M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
24.7M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
24.7M
    if (str < last)
2651
24.4M
  c = CUR_SCHAR(str, l);
2652
314k
    else
2653
314k
        c = 0;
2654
1.66G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
1.66G
           (c != end2) && (c != end3) &&
2656
1.66G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
1.64G
  if (c == 0) break;
2659
1.64G
        if ((c == '&') && (str[1] == '#')) {
2660
176k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
176k
      if (val == 0)
2662
1.24k
                goto int_error;
2663
174k
      COPY_BUF(0,buffer,nbchars,val);
2664
174k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
0
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
0
      }
2667
1.64G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
26.8M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
26.8M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
26.8M
      if ((ent != NULL) &&
2674
26.8M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
1.09k
    if (ent->content != NULL) {
2676
1.09k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
1.09k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
1.09k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
26.8M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
22.2M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
356
                    goto int_error;
2688
2689
22.2M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
151
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
151
                    xmlHaltParser(ctxt);
2692
151
                    ent->content[0] = 0;
2693
151
                    goto int_error;
2694
151
                }
2695
2696
22.2M
                ent->flags |= XML_ENT_EXPANDING;
2697
22.2M
    ctxt->depth++;
2698
22.2M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
22.2M
                        ent->length, what, 0, 0, 0, check);
2700
22.2M
    ctxt->depth--;
2701
22.2M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
22.2M
    if (rep == NULL) {
2704
4.88k
                    ent->content[0] = 0;
2705
4.88k
                    goto int_error;
2706
4.88k
                }
2707
2708
22.1M
                current = rep;
2709
6.97G
                while (*current != 0) { /* non input consuming loop */
2710
6.95G
                    buffer[nbchars++] = *current++;
2711
6.95G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
2.67M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
2.67M
                    }
2714
6.95G
                }
2715
22.1M
                xmlFree(rep);
2716
22.1M
                rep = NULL;
2717
22.1M
      } else if (ent != NULL) {
2718
1.83M
    int i = xmlStrlen(ent->name);
2719
1.83M
    const xmlChar *cur = ent->name;
2720
2721
1.83M
    buffer[nbchars++] = '&';
2722
1.83M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
407k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
407k
    }
2725
172M
    for (;i > 0;i--)
2726
170M
        buffer[nbchars++] = *cur++;
2727
1.83M
    buffer[nbchars++] = ';';
2728
1.83M
      }
2729
1.61G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
847k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
847k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
847k
      if (ent != NULL) {
2735
819k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
716
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
716
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
716
      (ctxt->validate != 0)) {
2745
695
      xmlLoadEntityContent(ctxt, ent);
2746
695
        } else {
2747
21
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
21
      "not validating will not read content for PE entity %s\n",
2749
21
                          ent->name, NULL);
2750
21
        }
2751
716
    }
2752
2753
819k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
59
                    goto int_error;
2755
2756
819k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
7
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
7
                    xmlHaltParser(ctxt);
2759
7
                    if (ent->content != NULL)
2760
7
                        ent->content[0] = 0;
2761
7
                    goto int_error;
2762
7
                }
2763
2764
819k
                ent->flags |= XML_ENT_EXPANDING;
2765
819k
    ctxt->depth++;
2766
819k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
819k
                        ent->length, what, 0, 0, 0, check);
2768
819k
    ctxt->depth--;
2769
819k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
819k
    if (rep == NULL) {
2772
323
                    if (ent->content != NULL)
2773
10
                        ent->content[0] = 0;
2774
323
                    goto int_error;
2775
323
                }
2776
818k
                current = rep;
2777
585M
                while (*current != 0) { /* non input consuming loop */
2778
584M
                    buffer[nbchars++] = *current++;
2779
584M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
179k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
179k
                    }
2782
584M
                }
2783
818k
                xmlFree(rep);
2784
818k
                rep = NULL;
2785
818k
      }
2786
1.61G
  } else {
2787
1.61G
      COPY_BUF(l,buffer,nbchars,c);
2788
1.61G
      str += l;
2789
1.61G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
502k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
502k
      }
2792
1.61G
  }
2793
1.64G
  if (str < last)
2794
1.61G
      c = CUR_SCHAR(str, l);
2795
24.4M
  else
2796
24.4M
      c = 0;
2797
1.64G
    }
2798
24.7M
    buffer[nbchars] = 0;
2799
24.7M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
7.02k
int_error:
2804
7.02k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
7.02k
    if (buffer != NULL)
2807
7.02k
        xmlFree(buffer);
2808
7.02k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
132
                           xmlChar end3) {
2836
132
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
132
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
132
                                      end, end2, end3, 0));
2840
132
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
33.1k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
33.1k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
33.1k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
33.1k
                                      end, end2, end3, 0));
2868
33.1k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
1.08M
                     int blank_chars) {
2890
1.08M
    int i, ret;
2891
1.08M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
1.08M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
51.2k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
1.03M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
1.03M
        (*(ctxt->space) == -2))
2905
171k
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
866k
    if (blank_chars == 0) {
2911
2.28M
  for (i = 0;i < len;i++)
2912
1.82M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
578k
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
752k
    if (ctxt->node == NULL) return(0);
2919
744k
    if (ctxt->myDoc != NULL) {
2920
744k
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
744k
        if (ret == 0) return(1);
2922
495k
        if (ret == 1) return(0);
2923
495k
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
488k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
483k
    if ((ctxt->node->children == NULL) &&
2930
483k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
482k
    lastChild = xmlGetLastChild(ctxt->node);
2933
482k
    if (lastChild == NULL) {
2934
122k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
122k
            (ctxt->node->content != NULL)) return(0);
2936
360k
    } else if (xmlNodeIsText(lastChild))
2937
6.50k
        return(0);
2938
354k
    else if ((ctxt->node->children != NULL) &&
2939
354k
             (xmlNodeIsText(ctxt->node->children)))
2940
6.70k
        return(0);
2941
469k
    return(1);
2942
482k
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
4.95M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
4.95M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
4.95M
    xmlChar *buffer = NULL;
2973
4.95M
    int len = 0;
2974
4.95M
    int max = XML_MAX_NAMELEN;
2975
4.95M
    xmlChar *ret = NULL;
2976
4.95M
    const xmlChar *cur = name;
2977
4.95M
    int c;
2978
2979
4.95M
    if (prefix == NULL) return(NULL);
2980
4.95M
    *prefix = NULL;
2981
2982
4.95M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
4.95M
    if (cur[0] == ':')
2993
2.56k
  return(xmlStrdup(name));
2994
2995
4.95M
    c = *cur++;
2996
26.1M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
21.1M
  buf[len++] = c;
2998
21.1M
  c = *cur++;
2999
21.1M
    }
3000
4.95M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
1.10k
  max = len * 2;
3006
3007
1.10k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
1.10k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
1.10k
  memcpy(buffer, buf, len);
3013
1.96M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
1.96M
      if (len + 10 > max) {
3015
2.83k
          xmlChar *tmp;
3016
3017
2.83k
    max *= 2;
3018
2.83k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
2.83k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
2.83k
    buffer = tmp;
3025
2.83k
      }
3026
1.96M
      buffer[len++] = c;
3027
1.96M
      c = *cur++;
3028
1.96M
  }
3029
1.10k
  buffer[len] = 0;
3030
1.10k
    }
3031
3032
4.95M
    if ((c == ':') && (*cur == 0)) {
3033
808
        if (buffer != NULL)
3034
36
      xmlFree(buffer);
3035
808
  *prefix = NULL;
3036
808
  return(xmlStrdup(name));
3037
808
    }
3038
3039
4.95M
    if (buffer == NULL)
3040
4.95M
  ret = xmlStrndup(buf, len);
3041
1.07k
    else {
3042
1.07k
  ret = buffer;
3043
1.07k
  buffer = NULL;
3044
1.07k
  max = XML_MAX_NAMELEN;
3045
1.07k
    }
3046
3047
3048
4.95M
    if (c == ':') {
3049
494k
  c = *cur;
3050
494k
        *prefix = ret;
3051
494k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
494k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
494k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
494k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
494k
        (c == '_') || (c == ':'))) {
3063
855
      int l;
3064
855
      int first = CUR_SCHAR(cur, l);
3065
3066
855
      if (!IS_LETTER(first) && (first != '_')) {
3067
334
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
334
          "Name %s is not XML Namespace compliant\n",
3069
334
          name);
3070
334
      }
3071
855
  }
3072
494k
  cur++;
3073
3074
3.02M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
2.52M
      buf[len++] = c;
3076
2.52M
      c = *cur++;
3077
2.52M
  }
3078
494k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
280
      max = len * 2;
3084
3085
280
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
280
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
280
      memcpy(buffer, buf, len);
3091
541k
      while (c != 0) { /* tested bigname2.xml */
3092
541k
    if (len + 10 > max) {
3093
664
        xmlChar *tmp;
3094
3095
664
        max *= 2;
3096
664
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
664
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
664
        buffer = tmp;
3103
664
    }
3104
541k
    buffer[len++] = c;
3105
541k
    c = *cur++;
3106
541k
      }
3107
280
      buffer[len] = 0;
3108
280
  }
3109
3110
494k
  if (buffer == NULL)
3111
493k
      ret = xmlStrndup(buf, len);
3112
280
  else {
3113
280
      ret = buffer;
3114
280
  }
3115
494k
    }
3116
3117
4.95M
    return(ret);
3118
4.95M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
28.7M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
28.7M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
20.7M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
20.7M
      (((c >= 'a') && (c <= 'z')) ||
3160
20.7M
       ((c >= 'A') && (c <= 'Z')) ||
3161
20.7M
       (c == '_') || (c == ':') ||
3162
20.7M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
20.7M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
20.7M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
20.7M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
20.7M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
20.7M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
20.7M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
20.7M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
20.7M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
20.7M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
20.7M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
20.7M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
20.7M
      return(1);
3175
20.7M
    } else {
3176
8.02M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
8.02M
      return(1);
3178
8.02M
    }
3179
28.4k
    return(0);
3180
28.7M
}
3181
3182
static int
3183
2.01G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
2.01G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
1.59G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
1.59G
      (((c >= 'a') && (c <= 'z')) ||
3191
1.58G
       ((c >= 'A') && (c <= 'Z')) ||
3192
1.58G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
1.58G
       (c == '_') || (c == ':') ||
3194
1.58G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
1.58G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
1.58G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
1.58G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
1.58G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
1.58G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
1.58G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
1.58G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
1.58G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
1.58G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
1.58G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
1.58G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
1.58G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
1.58G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
1.58G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
1.56G
       return(1);
3210
1.59G
    } else {
3211
420M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
420M
            (c == '.') || (c == '-') ||
3213
420M
      (c == '_') || (c == ':') ||
3214
420M
      (IS_COMBINING(c)) ||
3215
420M
      (IS_EXTENDER(c)))
3216
412M
      return(1);
3217
420M
    }
3218
29.6M
    return(0);
3219
2.01G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
539k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
539k
    int len = 0, l;
3227
539k
    int c;
3228
539k
    int count = 0;
3229
539k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
111k
                    XML_MAX_TEXT_LENGTH :
3231
539k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
539k
    GROW;
3241
539k
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
539k
    c = CUR_CHAR(l);
3244
539k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
394k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
394k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
392k
         ((c >= 'A') && (c <= 'Z')) ||
3252
392k
         (c == '_') || (c == ':') ||
3253
392k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
392k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
392k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
392k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
392k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
392k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
392k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
392k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
392k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
392k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
392k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
392k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
24.7k
      return(NULL);
3266
24.7k
  }
3267
369k
  len += l;
3268
369k
  NEXTL(l);
3269
369k
  c = CUR_CHAR(l);
3270
3.45M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
3.45M
         (((c >= 'a') && (c <= 'z')) ||
3272
3.44M
          ((c >= 'A') && (c <= 'Z')) ||
3273
3.44M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
3.44M
          (c == '_') || (c == ':') ||
3275
3.44M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
3.44M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
3.44M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
3.44M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
3.44M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
3.44M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
3.44M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
3.44M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
3.44M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
3.44M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
3.44M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
3.44M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
3.44M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
3.44M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
3.44M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
3.44M
    )) {
3291
3.08M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
14.1k
    count = 0;
3293
14.1k
    GROW;
3294
14.1k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
14.1k
      }
3297
3.08M
            if (len <= INT_MAX - l)
3298
3.08M
          len += l;
3299
3.08M
      NEXTL(l);
3300
3.08M
      c = CUR_CHAR(l);
3301
3.08M
  }
3302
369k
    } else {
3303
145k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
145k
      (!IS_LETTER(c) && (c != '_') &&
3305
141k
       (c != ':'))) {
3306
36.3k
      return(NULL);
3307
36.3k
  }
3308
108k
  len += l;
3309
108k
  NEXTL(l);
3310
108k
  c = CUR_CHAR(l);
3311
3312
1.65M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
1.65M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
1.65M
    (c == '.') || (c == '-') ||
3315
1.65M
    (c == '_') || (c == ':') ||
3316
1.65M
    (IS_COMBINING(c)) ||
3317
1.65M
    (IS_EXTENDER(c)))) {
3318
1.54M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
9.90k
    count = 0;
3320
9.90k
    GROW;
3321
9.90k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
9.90k
      }
3324
1.54M
            if (len <= INT_MAX - l)
3325
1.54M
          len += l;
3326
1.54M
      NEXTL(l);
3327
1.54M
      c = CUR_CHAR(l);
3328
1.54M
  }
3329
108k
    }
3330
478k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
478k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
478k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
334
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
478k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
478k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
84.0M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
84.0M
    const xmlChar *in;
3370
84.0M
    const xmlChar *ret;
3371
84.0M
    size_t count = 0;
3372
84.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
16.7M
                       XML_MAX_TEXT_LENGTH :
3374
84.0M
                       XML_MAX_NAME_LENGTH;
3375
3376
84.0M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
84.0M
    in = ctxt->input->cur;
3386
84.0M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
84.0M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
84.0M
  (*in == '_') || (*in == ':')) {
3389
83.9M
  in++;
3390
382M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
382M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
382M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
382M
         (*in == '_') || (*in == '-') ||
3394
382M
         (*in == ':') || (*in == '.'))
3395
298M
      in++;
3396
83.9M
  if ((*in > 0) && (*in < 0x80)) {
3397
83.5M
      count = in - ctxt->input->cur;
3398
83.5M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
83.5M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
83.5M
      ctxt->input->cur = in;
3404
83.5M
      ctxt->input->col += count;
3405
83.5M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
83.5M
      return(ret);
3408
83.5M
  }
3409
83.9M
    }
3410
    /* accelerator for special cases */
3411
539k
    return(xmlParseNameComplex(ctxt));
3412
84.0M
}
3413
3414
static const xmlChar *
3415
58.3k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
58.3k
    int len = 0, l;
3417
58.3k
    int c;
3418
58.3k
    int count = 0;
3419
58.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
19.4k
                    XML_MAX_TEXT_LENGTH :
3421
58.3k
                    XML_MAX_NAME_LENGTH;
3422
58.3k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
58.3k
    GROW;
3432
58.3k
    startPosition = CUR_PTR - BASE_PTR;
3433
58.3k
    c = CUR_CHAR(l);
3434
58.3k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
58.3k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
35.9k
  return(NULL);
3437
35.9k
    }
3438
3439
1.32M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
1.32M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
1.30M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
11.3k
      count = 0;
3443
11.3k
      GROW;
3444
11.3k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
11.3k
  }
3447
1.30M
        if (len <= INT_MAX - l)
3448
1.30M
      len += l;
3449
1.30M
  NEXTL(l);
3450
1.30M
  c = CUR_CHAR(l);
3451
1.30M
  if (c == 0) {
3452
5.29k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
5.29k
      ctxt->input->cur -= l;
3459
5.29k
      GROW;
3460
5.29k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
5.29k
      ctxt->input->cur += l;
3463
5.29k
      c = CUR_CHAR(l);
3464
5.29k
  }
3465
1.30M
    }
3466
22.3k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
22.3k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
22.3k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
4.10M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
4.10M
    const xmlChar *in, *e;
3491
4.10M
    const xmlChar *ret;
3492
4.10M
    size_t count = 0;
3493
4.10M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
491k
                       XML_MAX_TEXT_LENGTH :
3495
4.10M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
4.10M
    in = ctxt->input->cur;
3505
4.10M
    e = ctxt->input->end;
3506
4.10M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
4.10M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
4.10M
   (*in == '_')) && (in < e)) {
3509
4.06M
  in++;
3510
20.0M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
20.0M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
20.0M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
20.0M
          (*in == '_') || (*in == '-') ||
3514
20.0M
          (*in == '.')) && (in < e))
3515
16.0M
      in++;
3516
4.06M
  if (in >= e)
3517
2.55k
      goto complex;
3518
4.06M
  if ((*in > 0) && (*in < 0x80)) {
3519
4.04M
      count = in - ctxt->input->cur;
3520
4.04M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
4.04M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
4.04M
      ctxt->input->cur = in;
3526
4.04M
      ctxt->input->col += count;
3527
4.04M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
4.04M
      return(ret);
3531
4.04M
  }
3532
4.06M
    }
3533
58.3k
complex:
3534
58.3k
    return(xmlParseNCNameComplex(ctxt));
3535
4.10M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
2.81M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
2.81M
    register const xmlChar *cmp = other;
3551
2.81M
    register const xmlChar *in;
3552
2.81M
    const xmlChar *ret;
3553
3554
2.81M
    GROW;
3555
2.81M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
2.81M
    in = ctxt->input->cur;
3559
15.3M
    while (*in != 0 && *in == *cmp) {
3560
12.5M
  ++in;
3561
12.5M
  ++cmp;
3562
12.5M
    }
3563
2.81M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
2.76M
  ctxt->input->col += in - ctxt->input->cur;
3566
2.76M
  ctxt->input->cur = in;
3567
2.76M
  return (const xmlChar*) 1;
3568
2.76M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
46.1k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
46.1k
    if (ret == other) {
3573
3.41k
  return (const xmlChar*) 1;
3574
3.41k
    }
3575
42.7k
    return ret;
3576
46.1k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
28.7M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
28.7M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
28.7M
    const xmlChar *cur = *str;
3600
28.7M
    int len = 0, l;
3601
28.7M
    int c;
3602
28.7M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
7.97M
                    XML_MAX_TEXT_LENGTH :
3604
28.7M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
28.7M
    c = CUR_SCHAR(cur, l);
3611
28.7M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
890
  return(NULL);
3613
890
    }
3614
3615
28.7M
    COPY_BUF(l,buf,len,c);
3616
28.7M
    cur += l;
3617
28.7M
    c = CUR_SCHAR(cur, l);
3618
696M
    while (xmlIsNameChar(ctxt, c)) {
3619
673M
  COPY_BUF(l,buf,len,c);
3620
673M
  cur += l;
3621
673M
  c = CUR_SCHAR(cur, l);
3622
673M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
5.74M
      xmlChar *buffer;
3628
5.74M
      int max = len * 2;
3629
3630
5.74M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
5.74M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
5.74M
      memcpy(buffer, buf, len);
3636
1.30G
      while (xmlIsNameChar(ctxt, c)) {
3637
1.30G
    if (len + 10 > max) {
3638
5.74M
        xmlChar *tmp;
3639
3640
5.74M
        max *= 2;
3641
5.74M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
5.74M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
5.74M
        buffer = tmp;
3648
5.74M
    }
3649
1.30G
    COPY_BUF(l,buffer,len,c);
3650
1.30G
    cur += l;
3651
1.30G
    c = CUR_SCHAR(cur, l);
3652
1.30G
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
1.30G
      }
3658
5.74M
      buffer[len] = 0;
3659
5.74M
      *str = cur;
3660
5.74M
      return(buffer);
3661
5.74M
  }
3662
673M
    }
3663
22.9M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
22.9M
    *str = cur;
3668
22.9M
    return(xmlStrndup(buf, len));
3669
22.9M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
913k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
913k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
913k
    int len = 0, l;
3690
913k
    int c;
3691
913k
    int count = 0;
3692
913k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
134k
                    XML_MAX_TEXT_LENGTH :
3694
913k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
913k
    GROW;
3701
913k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
913k
    c = CUR_CHAR(l);
3704
3705
5.61M
    while (xmlIsNameChar(ctxt, c)) {
3706
4.69M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
4.69M
  COPY_BUF(l,buf,len,c);
3711
4.69M
  NEXTL(l);
3712
4.69M
  c = CUR_CHAR(l);
3713
4.69M
  if (c == 0) {
3714
250
      count = 0;
3715
250
      GROW;
3716
250
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
250
            c = CUR_CHAR(l);
3719
250
  }
3720
4.69M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
305
      xmlChar *buffer;
3726
305
      int max = len * 2;
3727
3728
305
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
305
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
305
      memcpy(buffer, buf, len);
3734
848k
      while (xmlIsNameChar(ctxt, c)) {
3735
848k
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
8.45k
        count = 0;
3737
8.45k
        GROW;
3738
8.45k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
8.45k
    }
3743
848k
    if (len + 10 > max) {
3744
972
        xmlChar *tmp;
3745
3746
972
        max *= 2;
3747
972
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
972
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
972
        buffer = tmp;
3754
972
    }
3755
848k
    COPY_BUF(l,buffer,len,c);
3756
848k
    NEXTL(l);
3757
848k
    c = CUR_CHAR(l);
3758
848k
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
848k
      }
3764
305
      buffer[len] = 0;
3765
305
      return(buffer);
3766
305
  }
3767
4.69M
    }
3768
913k
    if (len == 0)
3769
2.13k
        return(NULL);
3770
911k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
911k
    return(xmlStrndup(buf, len));
3775
911k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
964k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
964k
    xmlChar *buf = NULL;
3795
964k
    int len = 0;
3796
964k
    int size = XML_PARSER_BUFFER_SIZE;
3797
964k
    int c, l;
3798
964k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
247k
                    XML_MAX_HUGE_LENGTH :
3800
964k
                    XML_MAX_TEXT_LENGTH;
3801
964k
    xmlChar stop;
3802
964k
    xmlChar *ret = NULL;
3803
964k
    const xmlChar *cur = NULL;
3804
964k
    xmlParserInputPtr input;
3805
3806
964k
    if (RAW == '"') stop = '"';
3807
206k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
964k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
964k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
964k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
964k
    input = ctxt->input;
3824
964k
    GROW;
3825
964k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
964k
    NEXT;
3828
964k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
82.7M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
82.7M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
81.7M
  if (len + 5 >= size) {
3841
236k
      xmlChar *tmp;
3842
3843
236k
      size *= 2;
3844
236k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
236k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
236k
      buf = tmp;
3850
236k
  }
3851
81.7M
  COPY_BUF(l,buf,len,c);
3852
81.7M
  NEXTL(l);
3853
3854
81.7M
  GROW;
3855
81.7M
  c = CUR_CHAR(l);
3856
81.7M
  if (c == 0) {
3857
803
      GROW;
3858
803
      c = CUR_CHAR(l);
3859
803
  }
3860
3861
81.7M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
81.7M
    }
3867
964k
    buf[len] = 0;
3868
964k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
964k
    if (c != stop) {
3871
1.30k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
1.30k
        goto error;
3873
1.30k
    }
3874
962k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
962k
    cur = buf;
3882
45.4M
    while (*cur != 0) { /* non input consuming */
3883
44.4M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
1.03M
      xmlChar *name;
3885
1.03M
      xmlChar tmp = *cur;
3886
1.03M
            int nameOk = 0;
3887
3888
1.03M
      cur++;
3889
1.03M
      name = xmlParseStringName(ctxt, &cur);
3890
1.03M
            if (name != NULL) {
3891
1.03M
                nameOk = 1;
3892
1.03M
                xmlFree(name);
3893
1.03M
            }
3894
1.03M
            if ((nameOk == 0) || (*cur != ';')) {
3895
2.78k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
2.78k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
2.78k
                            tmp);
3898
2.78k
                goto error;
3899
2.78k
      }
3900
1.03M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
1.03M
    (ctxt->inputNr == 1)) {
3902
67
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
67
                goto error;
3904
67
      }
3905
1.03M
      if (*cur == 0)
3906
0
          break;
3907
1.03M
  }
3908
44.4M
  cur++;
3909
44.4M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
960k
    ++ctxt->depth;
3920
960k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
960k
                                     0, 0, 0, /* check */ 1);
3922
960k
    --ctxt->depth;
3923
3924
960k
    if (orig != NULL) {
3925
960k
        *orig = buf;
3926
960k
        buf = NULL;
3927
960k
    }
3928
3929
964k
error:
3930
964k
    if (buf != NULL)
3931
4.16k
        xmlFree(buf);
3932
964k
    return(ret);
3933
960k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
96.5k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
96.5k
    xmlChar limit = 0;
3950
96.5k
    xmlChar *buf = NULL;
3951
96.5k
    xmlChar *rep = NULL;
3952
96.5k
    size_t len = 0;
3953
96.5k
    size_t buf_size = 0;
3954
96.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
48.3k
                       XML_MAX_HUGE_LENGTH :
3956
96.5k
                       XML_MAX_TEXT_LENGTH;
3957
96.5k
    int c, l, in_space = 0;
3958
96.5k
    xmlChar *current = NULL;
3959
96.5k
    xmlEntityPtr ent;
3960
3961
96.5k
    if (NXT(0) == '"') {
3962
44.7k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
44.7k
  limit = '"';
3964
44.7k
        NEXT;
3965
51.8k
    } else if (NXT(0) == '\'') {
3966
51.8k
  limit = '\'';
3967
51.8k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
51.8k
        NEXT;
3969
51.8k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
96.5k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
96.5k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
96.5k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
96.5k
    c = CUR_CHAR(l);
3985
4.14M
    while (((NXT(0) != limit) && /* checked */
3986
4.14M
            (IS_CHAR(c)) && (c != '<')) &&
3987
4.14M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
4.04M
  if (c == '&') {
3989
895k
      in_space = 0;
3990
895k
      if (NXT(1) == '#') {
3991
44.7k
    int val = xmlParseCharRef(ctxt);
3992
3993
44.7k
    if (val == '&') {
3994
387
        if (ctxt->replaceEntities) {
3995
95
      if (len + 10 > buf_size) {
3996
0
          growBuffer(buf, 10);
3997
0
      }
3998
95
      buf[len++] = '&';
3999
292
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
292
      if (len + 10 > buf_size) {
4005
0
          growBuffer(buf, 10);
4006
0
      }
4007
292
      buf[len++] = '&';
4008
292
      buf[len++] = '#';
4009
292
      buf[len++] = '3';
4010
292
      buf[len++] = '8';
4011
292
      buf[len++] = ';';
4012
292
        }
4013
44.3k
    } else if (val != 0) {
4014
41.6k
        if (len + 10 > buf_size) {
4015
1.02k
      growBuffer(buf, 10);
4016
1.02k
        }
4017
41.6k
        len += xmlCopyChar(0, &buf[len], val);
4018
41.6k
    }
4019
851k
      } else {
4020
851k
    ent = xmlParseEntityRef(ctxt);
4021
851k
    if ((ent != NULL) &&
4022
851k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
33.8k
        if (len + 10 > buf_size) {
4024
0
      growBuffer(buf, 10);
4025
0
        }
4026
33.8k
        if ((ctxt->replaceEntities == 0) &&
4027
33.8k
            (ent->content[0] == '&')) {
4028
4.98k
      buf[len++] = '&';
4029
4.98k
      buf[len++] = '#';
4030
4.98k
      buf[len++] = '3';
4031
4.98k
      buf[len++] = '8';
4032
4.98k
      buf[len++] = ';';
4033
28.8k
        } else {
4034
28.8k
      buf[len++] = ent->content[0];
4035
28.8k
        }
4036
817k
    } else if ((ent != NULL) &&
4037
817k
               (ctxt->replaceEntities != 0)) {
4038
781k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
781k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
781k
      ++ctxt->depth;
4043
781k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
781k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
781k
                                /* check */ 1);
4046
781k
      --ctxt->depth;
4047
781k
      if (rep != NULL) {
4048
771k
          current = rep;
4049
176M
          while (*current != 0) { /* non input consuming */
4050
176M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
176M
                                    (*current == 0x9)) {
4052
960k
                                    buf[len++] = 0x20;
4053
960k
                                    current++;
4054
960k
                                } else
4055
175M
                                    buf[len++] = *current++;
4056
176M
        if (len + 10 > buf_size) {
4057
9.54k
            growBuffer(buf, 10);
4058
9.54k
        }
4059
176M
          }
4060
771k
          xmlFree(rep);
4061
771k
          rep = NULL;
4062
771k
      }
4063
781k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
781k
    } else if (ent != NULL) {
4071
13.9k
        int i = xmlStrlen(ent->name);
4072
13.9k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
13.9k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
13.9k
      (ent->content != NULL)) {
4081
11.1k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
1.78k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
1.78k
                            ctxt->sizeentcopy = ent->length;
4085
4086
1.78k
                            ++ctxt->depth;
4087
1.78k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
1.78k
                                    ent->content, ent->length,
4089
1.78k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
1.78k
                                    /* check */ 1);
4091
1.78k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
1.78k
                            if (ctxt->inSubset == 0) {
4100
1.29k
                                ent->flags |= XML_ENT_CHECKED;
4101
1.29k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
1.29k
                            }
4103
4104
1.78k
                            if (rep != NULL) {
4105
1.69k
                                xmlFree(rep);
4106
1.69k
                                rep = NULL;
4107
1.69k
                            } else {
4108
89
                                ent->content[0] = 0;
4109
89
                            }
4110
4111
1.78k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
62
                                goto error;
4113
9.39k
                        } else {
4114
9.39k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
9.39k
                        }
4117
11.1k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
13.9k
        buf[len++] = '&';
4123
14.0k
        while (len + i + 10 > buf_size) {
4124
252
      growBuffer(buf, i + 10);
4125
252
        }
4126
38.7k
        for (;i > 0;i--)
4127
24.7k
      buf[len++] = *cur++;
4128
13.9k
        buf[len++] = ';';
4129
13.9k
    }
4130
851k
      }
4131
3.14M
  } else {
4132
3.14M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
479k
          if ((len != 0) || (!normalize)) {
4134
472k
        if ((!normalize) || (!in_space)) {
4135
465k
      COPY_BUF(l,buf,len,0x20);
4136
466k
      while (len + 10 > buf_size) {
4137
956
          growBuffer(buf, 10);
4138
956
      }
4139
465k
        }
4140
472k
        in_space = 1;
4141
472k
    }
4142
2.66M
      } else {
4143
2.66M
          in_space = 0;
4144
2.66M
    COPY_BUF(l,buf,len,c);
4145
2.66M
    if (len + 10 > buf_size) {
4146
6.42k
        growBuffer(buf, 10);
4147
6.42k
    }
4148
2.66M
      }
4149
3.14M
      NEXTL(l);
4150
3.14M
  }
4151
4.04M
  GROW;
4152
4.04M
  c = CUR_CHAR(l);
4153
4.04M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
4.04M
    }
4159
96.5k
    if (ctxt->instate == XML_PARSER_EOF)
4160
448
        goto error;
4161
4162
96.0k
    if ((in_space) && (normalize)) {
4163
7.65k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
3.63k
    }
4165
96.0k
    buf[len] = 0;
4166
96.0k
    if (RAW == '<') {
4167
9.00k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
87.0k
    } else if (RAW != limit) {
4169
21.1k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
6.31k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
6.31k
         "invalid character in attribute value\n");
4172
14.8k
  } else {
4173
14.8k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
14.8k
         "AttValue: ' expected\n");
4175
14.8k
        }
4176
21.1k
    } else
4177
65.8k
  NEXT;
4178
4179
96.0k
    if (attlen != NULL) *attlen = len;
4180
96.0k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
510
error:
4185
510
    if (buf != NULL)
4186
510
        xmlFree(buf);
4187
510
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
510
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
795k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
795k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
795k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
795k
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
70.6k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
70.6k
    xmlChar *buf = NULL;
4250
70.6k
    int len = 0;
4251
70.6k
    int size = XML_PARSER_BUFFER_SIZE;
4252
70.6k
    int cur, l;
4253
70.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
19.0k
                    XML_MAX_TEXT_LENGTH :
4255
70.6k
                    XML_MAX_NAME_LENGTH;
4256
70.6k
    xmlChar stop;
4257
70.6k
    int state = ctxt->instate;
4258
70.6k
    int count = 0;
4259
4260
70.6k
    SHRINK;
4261
70.6k
    if (RAW == '"') {
4262
65.6k
        NEXT;
4263
65.6k
  stop = '"';
4264
65.6k
    } else if (RAW == '\'') {
4265
3.46k
        NEXT;
4266
3.46k
  stop = '\'';
4267
3.46k
    } else {
4268
1.51k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
1.51k
  return(NULL);
4270
1.51k
    }
4271
4272
69.1k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
69.1k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
69.1k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
69.1k
    cur = CUR_CHAR(l);
4279
1.41M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
1.34M
  if (len + 5 >= size) {
4281
918
      xmlChar *tmp;
4282
4283
918
      size *= 2;
4284
918
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
918
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
918
      buf = tmp;
4292
918
  }
4293
1.34M
  count++;
4294
1.34M
  if (count > 50) {
4295
4.26k
      SHRINK;
4296
4.26k
      GROW;
4297
4.26k
      count = 0;
4298
4.26k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
4.26k
  }
4303
1.34M
  COPY_BUF(l,buf,len,cur);
4304
1.34M
  NEXTL(l);
4305
1.34M
  cur = CUR_CHAR(l);
4306
1.34M
  if (cur == 0) {
4307
715
      GROW;
4308
715
      SHRINK;
4309
715
      cur = CUR_CHAR(l);
4310
715
  }
4311
1.34M
        if (len > maxLength) {
4312
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
0
            xmlFree(buf);
4314
0
            ctxt->instate = (xmlParserInputState) state;
4315
0
            return(NULL);
4316
0
        }
4317
1.34M
    }
4318
69.1k
    buf[len] = 0;
4319
69.1k
    ctxt->instate = (xmlParserInputState) state;
4320
69.1k
    if (!IS_CHAR(cur)) {
4321
1.09k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
68.0k
    } else {
4323
68.0k
  NEXT;
4324
68.0k
    }
4325
69.1k
    return(buf);
4326
69.1k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
29.4k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
29.4k
    xmlChar *buf = NULL;
4344
29.4k
    int len = 0;
4345
29.4k
    int size = XML_PARSER_BUFFER_SIZE;
4346
29.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
8.97k
                    XML_MAX_TEXT_LENGTH :
4348
29.4k
                    XML_MAX_NAME_LENGTH;
4349
29.4k
    xmlChar cur;
4350
29.4k
    xmlChar stop;
4351
29.4k
    int count = 0;
4352
29.4k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
29.4k
    SHRINK;
4355
29.4k
    if (RAW == '"') {
4356
28.6k
        NEXT;
4357
28.6k
  stop = '"';
4358
28.6k
    } else if (RAW == '\'') {
4359
587
        NEXT;
4360
587
  stop = '\'';
4361
587
    } else {
4362
233
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
233
  return(NULL);
4364
233
    }
4365
29.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
29.2k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
29.2k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
29.2k
    cur = CUR;
4372
1.15M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
1.12M
  if (len + 1 >= size) {
4374
395
      xmlChar *tmp;
4375
4376
395
      size *= 2;
4377
395
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
395
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
395
      buf = tmp;
4384
395
  }
4385
1.12M
  buf[len++] = cur;
4386
1.12M
  count++;
4387
1.12M
  if (count > 50) {
4388
2.89k
      SHRINK;
4389
2.89k
      GROW;
4390
2.89k
      count = 0;
4391
2.89k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
2.89k
  }
4396
1.12M
  NEXT;
4397
1.12M
  cur = CUR;
4398
1.12M
  if (cur == 0) {
4399
164
      GROW;
4400
164
      SHRINK;
4401
164
      cur = CUR;
4402
164
  }
4403
1.12M
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
1.12M
    }
4409
29.2k
    buf[len] = 0;
4410
29.2k
    if (cur != stop) {
4411
667
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
28.5k
    } else {
4413
28.5k
  NEXT;
4414
28.5k
    }
4415
29.2k
    ctxt->instate = oldstate;
4416
29.2k
    return(buf);
4417
29.2k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
6.44M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
6.44M
    const xmlChar *in;
4482
6.44M
    int nbchar = 0;
4483
6.44M
    int line = ctxt->input->line;
4484
6.44M
    int col = ctxt->input->col;
4485
6.44M
    int ccol;
4486
4487
6.44M
    SHRINK;
4488
6.44M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
6.44M
    in = ctxt->input->cur;
4494
6.75M
    do {
4495
9.70M
get_more_space:
4496
15.0M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
9.70M
        if (*in == 0xA) {
4498
3.09M
            do {
4499
3.09M
                ctxt->input->line++; ctxt->input->col = 1;
4500
3.09M
                in++;
4501
3.09M
            } while (*in == 0xA);
4502
2.94M
            goto get_more_space;
4503
2.94M
        }
4504
6.75M
        if (*in == '<') {
4505
1.21M
            nbchar = in - ctxt->input->cur;
4506
1.21M
            if (nbchar > 0) {
4507
1.21M
                const xmlChar *tmp = ctxt->input->cur;
4508
1.21M
                ctxt->input->cur = in;
4509
4510
1.21M
                if ((ctxt->sax != NULL) &&
4511
1.21M
                    (ctxt->sax->ignorableWhitespace !=
4512
1.21M
                     ctxt->sax->characters)) {
4513
326k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
272k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
272k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
272k
                                                   tmp, nbchar);
4517
272k
                    } else {
4518
53.6k
                        if (ctxt->sax->characters != NULL)
4519
53.6k
                            ctxt->sax->characters(ctxt->userData,
4520
53.6k
                                                  tmp, nbchar);
4521
53.6k
                        if (*ctxt->space == -1)
4522
14.8k
                            *ctxt->space = -2;
4523
53.6k
                    }
4524
889k
                } else if ((ctxt->sax != NULL) &&
4525
889k
                           (ctxt->sax->characters != NULL)) {
4526
889k
                    ctxt->sax->characters(ctxt->userData,
4527
889k
                                          tmp, nbchar);
4528
889k
                }
4529
1.21M
            }
4530
1.21M
            return;
4531
1.21M
        }
4532
4533
7.02M
get_more:
4534
7.02M
        ccol = ctxt->input->col;
4535
187M
        while (test_char_data[*in]) {
4536
180M
            in++;
4537
180M
            ccol++;
4538
180M
        }
4539
7.02M
        ctxt->input->col = ccol;
4540
7.02M
        if (*in == 0xA) {
4541
1.47M
            do {
4542
1.47M
                ctxt->input->line++; ctxt->input->col = 1;
4543
1.47M
                in++;
4544
1.47M
            } while (*in == 0xA);
4545
1.45M
            goto get_more;
4546
1.45M
        }
4547
5.56M
        if (*in == ']') {
4548
22.1k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
1.00k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
1.00k
                ctxt->input->cur = in + 1;
4551
1.00k
                return;
4552
1.00k
            }
4553
21.0k
            in++;
4554
21.0k
            ctxt->input->col++;
4555
21.0k
            goto get_more;
4556
22.1k
        }
4557
5.54M
        nbchar = in - ctxt->input->cur;
4558
5.54M
        if (nbchar > 0) {
4559
4.94M
            if ((ctxt->sax != NULL) &&
4560
4.94M
                (ctxt->sax->ignorableWhitespace !=
4561
4.94M
                 ctxt->sax->characters) &&
4562
4.94M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
680k
                const xmlChar *tmp = ctxt->input->cur;
4564
680k
                ctxt->input->cur = in;
4565
4566
680k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
445k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
445k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
445k
                                                       tmp, nbchar);
4570
445k
                } else {
4571
235k
                    if (ctxt->sax->characters != NULL)
4572
235k
                        ctxt->sax->characters(ctxt->userData,
4573
235k
                                              tmp, nbchar);
4574
235k
                    if (*ctxt->space == -1)
4575
120k
                        *ctxt->space = -2;
4576
235k
                }
4577
680k
                line = ctxt->input->line;
4578
680k
                col = ctxt->input->col;
4579
4.26M
            } else if (ctxt->sax != NULL) {
4580
4.26M
                if (ctxt->sax->characters != NULL)
4581
4.26M
                    ctxt->sax->characters(ctxt->userData,
4582
4.26M
                                          ctxt->input->cur, nbchar);
4583
4.26M
                line = ctxt->input->line;
4584
4.26M
                col = ctxt->input->col;
4585
4.26M
            }
4586
4.94M
        }
4587
5.54M
        ctxt->input->cur = in;
4588
5.54M
        if (*in == 0xD) {
4589
322k
            in++;
4590
322k
            if (*in == 0xA) {
4591
319k
                ctxt->input->cur = in;
4592
319k
                in++;
4593
319k
                ctxt->input->line++; ctxt->input->col = 1;
4594
319k
                continue; /* while */
4595
319k
            }
4596
2.45k
            in--;
4597
2.45k
        }
4598
5.22M
        if (*in == '<') {
4599
4.56M
            return;
4600
4.56M
        }
4601
659k
        if (*in == '&') {
4602
220k
            return;
4603
220k
        }
4604
439k
        SHRINK;
4605
439k
        GROW;
4606
439k
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
439k
        in = ctxt->input->cur;
4609
759k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
759k
             (*in == 0x09) || (*in == 0x0a));
4611
441k
    ctxt->input->line = line;
4612
441k
    ctxt->input->col = col;
4613
441k
    xmlParseCharDataComplex(ctxt);
4614
441k
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
441k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
441k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
441k
    int nbchar = 0;
4631
441k
    int cur, l;
4632
441k
    int count = 0;
4633
4634
441k
    SHRINK;
4635
441k
    GROW;
4636
441k
    cur = CUR_CHAR(l);
4637
9.54M
    while ((cur != '<') && /* checked */
4638
9.54M
           (cur != '&') &&
4639
9.54M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
9.10M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
1.02k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
1.02k
  }
4643
9.10M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
9.10M
  NEXTL(l);
4646
9.10M
  cur = CUR_CHAR(l);
4647
9.10M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
25.9k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
25.9k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
16.9k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
16.9k
    } else {
4659
16.9k
        if (ctxt->sax->characters != NULL)
4660
16.9k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
16.9k
        if ((ctxt->sax->characters !=
4662
16.9k
             ctxt->sax->ignorableWhitespace) &&
4663
16.9k
      (*ctxt->space == -1))
4664
346
      *ctxt->space = -2;
4665
16.9k
    }
4666
16.9k
      }
4667
25.9k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
25.9k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
25.9k
  }
4672
9.10M
  count++;
4673
9.10M
  if (count > 50) {
4674
162k
      SHRINK;
4675
162k
      GROW;
4676
162k
      count = 0;
4677
162k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
162k
  }
4680
9.10M
    }
4681
441k
    if (nbchar != 0) {
4682
73.9k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
73.9k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
65.4k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
439
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
439
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
65.0k
      } else {
4691
65.0k
    if (ctxt->sax->characters != NULL)
4692
65.0k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
65.0k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
65.0k
        (*ctxt->space == -1))
4695
12.5k
        *ctxt->space = -2;
4696
65.0k
      }
4697
65.4k
  }
4698
73.9k
    }
4699
441k
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
319k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
319k
                          "PCDATA invalid Char value %d\n",
4703
319k
                    cur ? cur : CUR);
4704
319k
  NEXT;
4705
319k
    }
4706
441k
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
125k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
125k
    xmlChar *URI = NULL;
4735
4736
125k
    SHRINK;
4737
4738
125k
    *publicID = NULL;
4739
125k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
41.4k
        SKIP(6);
4741
41.4k
  if (SKIP_BLANKS == 0) {
4742
222
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
222
                     "Space required after 'SYSTEM'\n");
4744
222
  }
4745
41.4k
  URI = xmlParseSystemLiteral(ctxt);
4746
41.4k
  if (URI == NULL) {
4747
318
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
318
        }
4749
84.2k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
29.4k
        SKIP(6);
4751
29.4k
  if (SKIP_BLANKS == 0) {
4752
164
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
164
        "Space required after 'PUBLIC'\n");
4754
164
  }
4755
29.4k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
29.4k
  if (*publicID == NULL) {
4757
233
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
233
  }
4759
29.4k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
29.1k
      if (SKIP_BLANKS == 0) {
4764
1.24k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
1.24k
      "Space required after the Public Identifier\n");
4766
1.24k
      }
4767
29.1k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
269
      if (SKIP_BLANKS == 0) return(NULL);
4775
29
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
29
  }
4777
29.1k
  URI = xmlParseSystemLiteral(ctxt);
4778
29.1k
  if (URI == NULL) {
4779
1.19k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
1.19k
        }
4781
29.1k
    }
4782
125k
    return(URI);
4783
125k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
1.23M
                       size_t len, size_t size) {
4802
1.23M
    int q, ql;
4803
1.23M
    int r, rl;
4804
1.23M
    int cur, l;
4805
1.23M
    size_t count = 0;
4806
1.23M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
79.3k
                       XML_MAX_HUGE_LENGTH :
4808
1.23M
                       XML_MAX_TEXT_LENGTH;
4809
1.23M
    int inputid;
4810
4811
1.23M
    inputid = ctxt->input->id;
4812
4813
1.23M
    if (buf == NULL) {
4814
15.2k
        len = 0;
4815
15.2k
  size = XML_PARSER_BUFFER_SIZE;
4816
15.2k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
15.2k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
15.2k
    }
4822
1.23M
    GROW; /* Assure there's enough input data */
4823
1.23M
    q = CUR_CHAR(ql);
4824
1.23M
    if (q == 0)
4825
1.20M
        goto not_terminated;
4826
25.7k
    if (!IS_CHAR(q)) {
4827
1.70k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
1.70k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
1.70k
                    q);
4830
1.70k
  xmlFree (buf);
4831
1.70k
  return;
4832
1.70k
    }
4833
24.0k
    NEXTL(ql);
4834
24.0k
    r = CUR_CHAR(rl);
4835
24.0k
    if (r == 0)
4836
287
        goto not_terminated;
4837
23.7k
    if (!IS_CHAR(r)) {
4838
106
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
106
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
106
                    r);
4841
106
  xmlFree (buf);
4842
106
  return;
4843
106
    }
4844
23.6k
    NEXTL(rl);
4845
23.6k
    cur = CUR_CHAR(l);
4846
23.6k
    if (cur == 0)
4847
113
        goto not_terminated;
4848
4.19M
    while (IS_CHAR(cur) && /* checked */
4849
4.19M
           ((cur != '>') ||
4850
4.19M
      (r != '-') || (q != '-'))) {
4851
4.17M
  if ((r == '-') && (q == '-')) {
4852
1.72k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
1.72k
  }
4854
4.17M
  if (len + 5 >= size) {
4855
5.34k
      xmlChar *new_buf;
4856
5.34k
            size_t new_size;
4857
4858
5.34k
      new_size = size * 2;
4859
5.34k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
5.34k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
5.34k
      buf = new_buf;
4866
5.34k
            size = new_size;
4867
5.34k
  }
4868
4.17M
  COPY_BUF(ql,buf,len,q);
4869
4.17M
  q = r;
4870
4.17M
  ql = rl;
4871
4.17M
  r = cur;
4872
4.17M
  rl = l;
4873
4874
4.17M
  count++;
4875
4.17M
  if (count > 50) {
4876
70.8k
      SHRINK;
4877
70.8k
      GROW;
4878
70.8k
      count = 0;
4879
70.8k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
70.8k
  }
4884
4.17M
  NEXTL(l);
4885
4.17M
  cur = CUR_CHAR(l);
4886
4.17M
  if (cur == 0) {
4887
3.05k
      SHRINK;
4888
3.05k
      GROW;
4889
3.05k
      cur = CUR_CHAR(l);
4890
3.05k
  }
4891
4892
4.17M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
4.17M
    }
4899
23.5k
    buf[len] = 0;
4900
23.5k
    if (cur == 0) {
4901
3.05k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
3.05k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
20.5k
    } else if (!IS_CHAR(cur)) {
4904
503
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
503
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
503
                    cur);
4907
20.0k
    } else {
4908
20.0k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
20.0k
        NEXT;
4914
20.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
20.0k
      (!ctxt->disableSAX))
4916
17.6k
      ctxt->sax->comment(ctxt->userData, buf);
4917
20.0k
    }
4918
23.5k
    xmlFree(buf);
4919
23.5k
    return;
4920
1.20M
not_terminated:
4921
1.20M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
1.20M
       "Comment not terminated\n", NULL);
4923
1.20M
    xmlFree(buf);
4924
1.20M
    return;
4925
23.5k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
61.5M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
61.5M
    xmlChar *buf = NULL;
4943
61.5M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
61.5M
    size_t len = 0;
4945
61.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
9.36M
                       XML_MAX_HUGE_LENGTH :
4947
61.5M
                       XML_MAX_TEXT_LENGTH;
4948
61.5M
    xmlParserInputState state;
4949
61.5M
    const xmlChar *in;
4950
61.5M
    size_t nbchar = 0;
4951
61.5M
    int ccol;
4952
61.5M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
61.5M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
61.5M
    SKIP(2);
4960
61.5M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
116
        return;
4962
61.5M
    state = ctxt->instate;
4963
61.5M
    ctxt->instate = XML_PARSER_COMMENT;
4964
61.5M
    inputid = ctxt->input->id;
4965
61.5M
    SKIP(2);
4966
61.5M
    SHRINK;
4967
61.5M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
61.5M
    in = ctxt->input->cur;
4974
61.5M
    do {
4975
61.5M
  if (*in == 0xA) {
4976
284k
      do {
4977
284k
    ctxt->input->line++; ctxt->input->col = 1;
4978
284k
    in++;
4979
284k
      } while (*in == 0xA);
4980
284k
  }
4981
69.4M
get_more:
4982
69.4M
        ccol = ctxt->input->col;
4983
317M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
317M
         ((*in >= 0x20) && (*in < '-')) ||
4985
317M
         (*in == 0x09)) {
4986
248M
        in++;
4987
248M
        ccol++;
4988
248M
  }
4989
69.4M
  ctxt->input->col = ccol;
4990
69.4M
  if (*in == 0xA) {
4991
2.65M
      do {
4992
2.65M
    ctxt->input->line++; ctxt->input->col = 1;
4993
2.65M
    in++;
4994
2.65M
      } while (*in == 0xA);
4995
2.53M
      goto get_more;
4996
2.53M
  }
4997
66.9M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
66.9M
  if (nbchar > 0) {
5002
7.20M
      if ((ctxt->sax != NULL) &&
5003
7.20M
    (ctxt->sax->comment != NULL)) {
5004
7.20M
    if (buf == NULL) {
5005
2.76M
        if ((*in == '-') && (in[1] == '-'))
5006
1.01M
            size = nbchar + 1;
5007
1.75M
        else
5008
1.75M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
2.76M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
2.76M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
2.76M
        len = 0;
5016
4.44M
    } else if (len + nbchar + 1 >= size) {
5017
601k
        xmlChar *new_buf;
5018
601k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
601k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
601k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
601k
        buf = new_buf;
5027
601k
    }
5028
7.20M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
7.20M
    len += nbchar;
5030
7.20M
    buf[len] = 0;
5031
7.20M
      }
5032
7.20M
  }
5033
66.9M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
66.9M
  ctxt->input->cur = in;
5040
66.9M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
66.9M
  if (*in == 0xD) {
5045
1.32M
      in++;
5046
1.32M
      if (*in == 0xA) {
5047
1.32M
    ctxt->input->cur = in;
5048
1.32M
    in++;
5049
1.32M
    ctxt->input->line++; ctxt->input->col = 1;
5050
1.32M
    goto get_more;
5051
1.32M
      }
5052
367
      in--;
5053
367
  }
5054
65.5M
  SHRINK;
5055
65.5M
  GROW;
5056
65.5M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
65.5M
  in = ctxt->input->cur;
5061
65.5M
  if (*in == '-') {
5062
64.3M
      if (in[1] == '-') {
5063
61.5M
          if (in[2] == '>') {
5064
60.3M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
60.3M
        SKIP(3);
5070
60.3M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
60.3M
            (!ctxt->disableSAX)) {
5072
30.3M
      if (buf != NULL)
5073
1.39M
          ctxt->sax->comment(ctxt->userData, buf);
5074
28.9M
      else
5075
28.9M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
30.3M
        }
5077
60.3M
        if (buf != NULL)
5078
1.54M
            xmlFree(buf);
5079
60.3M
        if (ctxt->instate != XML_PARSER_EOF)
5080
60.3M
      ctxt->instate = state;
5081
60.3M
        return;
5082
60.3M
    }
5083
1.22M
    if (buf != NULL) {
5084
535k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
535k
                          "Double hyphen within comment: "
5086
535k
                                      "<!--%.50s\n",
5087
535k
              buf);
5088
535k
    } else
5089
684k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
684k
                          "Double hyphen within comment\n", NULL);
5091
1.22M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
1.22M
    in++;
5096
1.22M
    ctxt->input->col++;
5097
1.22M
      }
5098
4.03M
      in++;
5099
4.03M
      ctxt->input->col++;
5100
4.03M
      goto get_more;
5101
64.3M
  }
5102
65.5M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
1.23M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
1.23M
    ctxt->instate = state;
5105
1.23M
    return;
5106
61.5M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
72.1k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
72.1k
    const xmlChar *name;
5125
5126
72.1k
    name = xmlParseName(ctxt);
5127
72.1k
    if ((name != NULL) &&
5128
72.1k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
72.1k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
72.1k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
13.5k
  int i;
5132
13.5k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
13.5k
      (name[2] == 'l') && (name[3] == 0)) {
5134
1.86k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
1.86k
     "XML declaration allowed only at the start of the document\n");
5136
1.86k
      return(name);
5137
11.6k
  } else if (name[3] == 0) {
5138
215
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
215
      return(name);
5140
215
  }
5141
14.6k
  for (i = 0;;i++) {
5142
14.6k
      if (xmlW3CPIs[i] == NULL) break;
5143
13.0k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
9.88k
          return(name);
5145
13.0k
  }
5146
1.58k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
1.58k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
1.58k
          NULL, NULL);
5149
1.58k
    }
5150
60.1k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
327
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
327
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
327
    }
5154
60.1k
    return(name);
5155
72.1k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
0
    xmlChar *URL = NULL;
5176
0
    const xmlChar *tmp, *base;
5177
0
    xmlChar marker;
5178
5179
0
    tmp = catalog;
5180
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
0
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
0
error:
5211
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
0
            "Catalog PI syntax error: %s\n",
5213
0
      catalog, NULL);
5214
0
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
0
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
72.1k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
72.1k
    xmlChar *buf = NULL;
5235
72.1k
    size_t len = 0;
5236
72.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
72.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
25.1k
                       XML_MAX_HUGE_LENGTH :
5239
72.1k
                       XML_MAX_TEXT_LENGTH;
5240
72.1k
    int cur, l;
5241
72.1k
    const xmlChar *target;
5242
72.1k
    xmlParserInputState state;
5243
72.1k
    int count = 0;
5244
5245
72.1k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
72.1k
  int inputid = ctxt->input->id;
5247
72.1k
  state = ctxt->instate;
5248
72.1k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
72.1k
  SKIP(2);
5253
72.1k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
72.1k
        target = xmlParsePITarget(ctxt);
5260
72.1k
  if (target != NULL) {
5261
71.0k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
448
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
448
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
448
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
448
        (ctxt->sax->processingInstruction != NULL))
5274
380
        ctxt->sax->processingInstruction(ctxt->userData,
5275
380
                                         target, NULL);
5276
448
    if (ctxt->instate != XML_PARSER_EOF)
5277
448
        ctxt->instate = state;
5278
448
    return;
5279
448
      }
5280
70.6k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
70.6k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
70.6k
      if (SKIP_BLANKS == 0) {
5287
6.23k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
6.23k
        "ParsePI: PI %s space expected\n", target);
5289
6.23k
      }
5290
70.6k
      cur = CUR_CHAR(l);
5291
4.33M
      while (IS_CHAR(cur) && /* checked */
5292
4.33M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
4.25M
    if (len + 5 >= size) {
5294
4.02k
        xmlChar *tmp;
5295
4.02k
                    size_t new_size = size * 2;
5296
4.02k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
4.02k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
4.02k
        buf = tmp;
5304
4.02k
                    size = new_size;
5305
4.02k
    }
5306
4.25M
    count++;
5307
4.25M
    if (count > 50) {
5308
65.2k
        SHRINK;
5309
65.2k
        GROW;
5310
65.2k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
65.2k
        count = 0;
5315
65.2k
    }
5316
4.25M
    COPY_BUF(l,buf,len,cur);
5317
4.25M
    NEXTL(l);
5318
4.25M
    cur = CUR_CHAR(l);
5319
4.25M
    if (cur == 0) {
5320
2.42k
        SHRINK;
5321
2.42k
        GROW;
5322
2.42k
        cur = CUR_CHAR(l);
5323
2.42k
    }
5324
4.25M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
4.25M
      }
5332
70.6k
      buf[len] = 0;
5333
70.6k
      if (cur != '?') {
5334
4.52k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
4.52k
          "ParsePI: PI %s never end ...\n", target);
5336
66.1k
      } else {
5337
66.1k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
66.1k
    SKIP(2);
5343
5344
66.1k
#ifdef LIBXML_CATALOG_ENABLED
5345
66.1k
    if (((state == XML_PARSER_MISC) ||
5346
66.1k
               (state == XML_PARSER_START)) &&
5347
66.1k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
0
      (allow == XML_CATA_ALLOW_ALL))
5351
0
      xmlParseCatalogPI(ctxt, buf);
5352
0
    }
5353
66.1k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
66.1k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
66.1k
        (ctxt->sax->processingInstruction != NULL))
5361
56.5k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
56.5k
                                         target, buf);
5363
66.1k
      }
5364
70.6k
      xmlFree(buf);
5365
70.6k
  } else {
5366
1.06k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
1.06k
  }
5368
71.7k
  if (ctxt->instate != XML_PARSER_EOF)
5369
71.7k
      ctxt->instate = state;
5370
71.7k
    }
5371
72.1k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
1.25k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
1.25k
    const xmlChar *name;
5394
1.25k
    xmlChar *Pubid;
5395
1.25k
    xmlChar *Systemid;
5396
5397
1.25k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
1.25k
    SKIP(2);
5400
5401
1.25k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
912
  int inputid = ctxt->input->id;
5403
912
  SHRINK;
5404
912
  SKIP(8);
5405
912
  if (SKIP_BLANKS == 0) {
5406
83
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
83
         "Space required after '<!NOTATION'\n");
5408
83
      return;
5409
83
  }
5410
5411
829
        name = xmlParseName(ctxt);
5412
829
  if (name == NULL) {
5413
51
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
51
      return;
5415
51
  }
5416
778
  if (xmlStrchr(name, ':') != NULL) {
5417
27
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
27
         "colons are forbidden from notation names '%s'\n",
5419
27
         name, NULL, NULL);
5420
27
  }
5421
778
  if (SKIP_BLANKS == 0) {
5422
76
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
76
         "Space required after the NOTATION name'\n");
5424
76
      return;
5425
76
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
702
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
702
  SKIP_BLANKS;
5432
5433
702
  if (RAW == '>') {
5434
512
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
512
      NEXT;
5440
512
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
512
    (ctxt->sax->notationDecl != NULL))
5442
400
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
512
  } else {
5444
190
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
190
  }
5446
702
  if (Systemid != NULL) xmlFree(Systemid);
5447
702
  if (Pubid != NULL) xmlFree(Pubid);
5448
702
    }
5449
1.25k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
990k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
990k
    const xmlChar *name = NULL;
5478
990k
    xmlChar *value = NULL;
5479
990k
    xmlChar *URI = NULL, *literal = NULL;
5480
990k
    const xmlChar *ndata = NULL;
5481
990k
    int isParameter = 0;
5482
990k
    xmlChar *orig = NULL;
5483
5484
990k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
990k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
990k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
989k
  int inputid = ctxt->input->id;
5491
989k
  SHRINK;
5492
989k
  SKIP(6);
5493
989k
  if (SKIP_BLANKS == 0) {
5494
460
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
460
         "Space required after '<!ENTITY'\n");
5496
460
  }
5497
5498
989k
  if (RAW == '%') {
5499
786k
      NEXT;
5500
786k
      if (SKIP_BLANKS == 0) {
5501
141
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
141
             "Space required after '%%'\n");
5503
141
      }
5504
786k
      isParameter = 1;
5505
786k
  }
5506
5507
989k
        name = xmlParseName(ctxt);
5508
989k
  if (name == NULL) {
5509
805
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
805
                     "xmlParseEntityDecl: no name\n");
5511
805
            return;
5512
805
  }
5513
988k
  if (xmlStrchr(name, ':') != NULL) {
5514
105
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
105
         "colons are forbidden from entities names '%s'\n",
5516
105
         name, NULL, NULL);
5517
105
  }
5518
988k
  if (SKIP_BLANKS == 0) {
5519
1.36k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
1.36k
         "Space required after the entity name\n");
5521
1.36k
  }
5522
5523
988k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
988k
  if (isParameter) {
5528
785k
      if ((RAW == '"') || (RAW == '\'')) {
5529
780k
          value = xmlParseEntityValue(ctxt, &orig);
5530
780k
    if (value) {
5531
778k
        if ((ctxt->sax != NULL) &&
5532
778k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
742k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
742k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
742k
            NULL, NULL, value);
5536
778k
    }
5537
780k
      } else {
5538
5.37k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
5.37k
    if ((URI == NULL) && (literal == NULL)) {
5540
610
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
610
    }
5542
5.37k
    if (URI) {
5543
4.75k
        xmlURIPtr uri;
5544
5545
4.75k
        uri = xmlParseURI((const char *) URI);
5546
4.75k
        if (uri == NULL) {
5547
93
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
93
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
4.65k
        } else {
5555
4.65k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
17
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
4.64k
      } else {
5562
4.64k
          if ((ctxt->sax != NULL) &&
5563
4.64k
        (!ctxt->disableSAX) &&
5564
4.64k
        (ctxt->sax->entityDecl != NULL))
5565
4.52k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
4.52k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
4.52k
              literal, URI, NULL);
5568
4.64k
      }
5569
4.65k
      xmlFreeURI(uri);
5570
4.65k
        }
5571
4.75k
    }
5572
5.37k
      }
5573
785k
  } else {
5574
202k
      if ((RAW == '"') || (RAW == '\'')) {
5575
183k
          value = xmlParseEntityValue(ctxt, &orig);
5576
183k
    if ((ctxt->sax != NULL) &&
5577
183k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
174k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
174k
        XML_INTERNAL_GENERAL_ENTITY,
5580
174k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
183k
    if ((ctxt->myDoc == NULL) ||
5585
183k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
1.38k
        if (ctxt->myDoc == NULL) {
5587
594
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
594
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
594
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
594
        }
5594
1.38k
        if (ctxt->myDoc->intSubset == NULL)
5595
594
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
594
              BAD_CAST "fake", NULL, NULL);
5597
5598
1.38k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
1.38k
                    NULL, NULL, value);
5600
1.38k
    }
5601
183k
      } else {
5602
19.1k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
19.1k
    if ((URI == NULL) && (literal == NULL)) {
5604
1.35k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
1.35k
    }
5606
19.1k
    if (URI) {
5607
17.6k
        xmlURIPtr uri;
5608
5609
17.6k
        uri = xmlParseURI((const char *)URI);
5610
17.6k
        if (uri == NULL) {
5611
470
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
470
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
17.1k
        } else {
5619
17.1k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
52
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
52
      }
5626
17.1k
      xmlFreeURI(uri);
5627
17.1k
        }
5628
17.6k
    }
5629
19.1k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
1.59k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
1.59k
           "Space required before 'NDATA'\n");
5632
1.59k
    }
5633
19.1k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
1.42k
        SKIP(5);
5635
1.42k
        if (SKIP_BLANKS == 0) {
5636
50
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
50
               "Space required after 'NDATA'\n");
5638
50
        }
5639
1.42k
        ndata = xmlParseName(ctxt);
5640
1.42k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
1.42k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
1.29k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
1.29k
            literal, URI, ndata);
5644
17.7k
    } else {
5645
17.7k
        if ((ctxt->sax != NULL) &&
5646
17.7k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
16.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
16.0k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
16.0k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
17.7k
        if ((ctxt->replaceEntities != 0) &&
5655
17.7k
      ((ctxt->myDoc == NULL) ||
5656
13.1k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
274
      if (ctxt->myDoc == NULL) {
5658
100
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
100
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
100
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
100
      }
5665
5666
274
      if (ctxt->myDoc->intSubset == NULL)
5667
100
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
100
            BAD_CAST "fake", NULL, NULL);
5669
274
      xmlSAX2EntityDecl(ctxt, name,
5670
274
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
274
                  literal, URI, NULL);
5672
274
        }
5673
17.7k
    }
5674
19.1k
      }
5675
202k
  }
5676
988k
  if (ctxt->instate == XML_PARSER_EOF)
5677
66
      goto done;
5678
988k
  SKIP_BLANKS;
5679
988k
  if (RAW != '>') {
5680
5.21k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
5.21k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
5.21k
      xmlHaltParser(ctxt);
5683
983k
  } else {
5684
983k
      if (inputid != ctxt->input->id) {
5685
12
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
12
                         "Entity declaration doesn't start and stop in"
5687
12
                               " the same entity\n");
5688
12
      }
5689
983k
      NEXT;
5690
983k
  }
5691
988k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
960k
      xmlEntityPtr cur = NULL;
5696
5697
960k
      if (isParameter) {
5698
778k
          if ((ctxt->sax != NULL) &&
5699
778k
        (ctxt->sax->getParameterEntity != NULL))
5700
778k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
778k
      } else {
5702
181k
          if ((ctxt->sax != NULL) &&
5703
181k
        (ctxt->sax->getEntity != NULL))
5704
181k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
181k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
5.30k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
5.30k
    }
5708
181k
      }
5709
960k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
915k
    cur->orig = orig;
5711
915k
                orig = NULL;
5712
915k
      }
5713
960k
  }
5714
5715
988k
done:
5716
988k
  if (value != NULL) xmlFree(value);
5717
988k
  if (URI != NULL) xmlFree(URI);
5718
988k
  if (literal != NULL) xmlFree(literal);
5719
988k
        if (orig != NULL) xmlFree(orig);
5720
988k
    }
5721
990k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
3.41M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
3.41M
    int val;
5757
3.41M
    xmlChar *ret;
5758
5759
3.41M
    *value = NULL;
5760
3.41M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
292k
  SKIP(9);
5762
292k
  return(XML_ATTRIBUTE_REQUIRED);
5763
292k
    }
5764
3.12M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
2.87M
  SKIP(8);
5766
2.87M
  return(XML_ATTRIBUTE_IMPLIED);
5767
2.87M
    }
5768
246k
    val = XML_ATTRIBUTE_NONE;
5769
246k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
169k
  SKIP(6);
5771
169k
  val = XML_ATTRIBUTE_FIXED;
5772
169k
  if (SKIP_BLANKS == 0) {
5773
85
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
85
         "Space required after '#FIXED'\n");
5775
85
  }
5776
169k
    }
5777
246k
    ret = xmlParseAttValue(ctxt);
5778
246k
    ctxt->instate = XML_PARSER_DTD;
5779
246k
    if (ret == NULL) {
5780
1.62k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
1.62k
           "Attribute default value declaration error\n");
5782
1.62k
    } else
5783
245k
        *value = ret;
5784
246k
    return(val);
5785
3.12M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
254
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
254
    const xmlChar *name;
5809
254
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
254
    if (RAW != '(') {
5812
30
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
30
  return(NULL);
5814
30
    }
5815
224
    SHRINK;
5816
226
    do {
5817
226
        NEXT;
5818
226
  SKIP_BLANKS;
5819
226
        name = xmlParseName(ctxt);
5820
226
  if (name == NULL) {
5821
24
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
24
         "Name expected in NOTATION declaration\n");
5823
24
            xmlFreeEnumeration(ret);
5824
24
      return(NULL);
5825
24
  }
5826
202
  tmp = ret;
5827
204
  while (tmp != NULL) {
5828
2
      if (xmlStrEqual(name, tmp->name)) {
5829
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
0
    "standalone: attribute notation value token %s duplicated\n",
5831
0
         name, NULL);
5832
0
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
0
    break;
5835
0
      }
5836
2
      tmp = tmp->next;
5837
2
  }
5838
202
  if (tmp == NULL) {
5839
202
      cur = xmlCreateEnumeration(name);
5840
202
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
202
      if (last == NULL) ret = last = cur;
5845
2
      else {
5846
2
    last->next = cur;
5847
2
    last = cur;
5848
2
      }
5849
202
  }
5850
202
  SKIP_BLANKS;
5851
202
    } while (RAW == '|');
5852
200
    if (RAW != ')') {
5853
25
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
25
        xmlFreeEnumeration(ret);
5855
25
  return(NULL);
5856
25
    }
5857
175
    NEXT;
5858
175
    return(ret);
5859
200
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
265k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
265k
    xmlChar *name;
5881
265k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
265k
    if (RAW != '(') {
5884
2.18k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
2.18k
  return(NULL);
5886
2.18k
    }
5887
263k
    SHRINK;
5888
910k
    do {
5889
910k
        NEXT;
5890
910k
  SKIP_BLANKS;
5891
910k
        name = xmlParseNmtoken(ctxt);
5892
910k
  if (name == NULL) {
5893
239
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
239
      return(ret);
5895
239
  }
5896
910k
  tmp = ret;
5897
2.49M
  while (tmp != NULL) {
5898
1.58M
      if (xmlStrEqual(name, tmp->name)) {
5899
32
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
32
    "standalone: attribute enumeration value token %s duplicated\n",
5901
32
         name, NULL);
5902
32
    if (!xmlDictOwns(ctxt->dict, name))
5903
32
        xmlFree(name);
5904
32
    break;
5905
32
      }
5906
1.58M
      tmp = tmp->next;
5907
1.58M
  }
5908
910k
  if (tmp == NULL) {
5909
910k
      cur = xmlCreateEnumeration(name);
5910
910k
      if (!xmlDictOwns(ctxt->dict, name))
5911
910k
    xmlFree(name);
5912
910k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
910k
      if (last == NULL) ret = last = cur;
5917
647k
      else {
5918
647k
    last->next = cur;
5919
647k
    last = cur;
5920
647k
      }
5921
910k
  }
5922
910k
  SKIP_BLANKS;
5923
910k
    } while (RAW == '|');
5924
263k
    if (RAW != ')') {
5925
654
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
654
  return(ret);
5927
654
    }
5928
262k
    NEXT;
5929
262k
    return(ret);
5930
263k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
266k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
266k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
282
  SKIP(8);
5953
282
  if (SKIP_BLANKS == 0) {
5954
28
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
28
         "Space required after 'NOTATION'\n");
5956
28
      return(0);
5957
28
  }
5958
254
  *tree = xmlParseNotationType(ctxt);
5959
254
  if (*tree == NULL) return(0);
5960
175
  return(XML_ATTRIBUTE_NOTATION);
5961
254
    }
5962
265k
    *tree = xmlParseEnumerationType(ctxt);
5963
265k
    if (*tree == NULL) return(0);
5964
263k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
265k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
3.41M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
3.41M
    SHRINK;
6017
3.41M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
1.13M
  SKIP(5);
6019
1.13M
  return(XML_ATTRIBUTE_CDATA);
6020
2.28M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
10.9k
  SKIP(6);
6022
10.9k
  return(XML_ATTRIBUTE_IDREFS);
6023
2.27M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
54.2k
  SKIP(5);
6025
54.2k
  return(XML_ATTRIBUTE_IDREF);
6026
2.21M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
1.00M
        SKIP(2);
6028
1.00M
  return(XML_ATTRIBUTE_ID);
6029
1.21M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
5.25k
  SKIP(6);
6031
5.25k
  return(XML_ATTRIBUTE_ENTITY);
6032
1.20M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
77
  SKIP(8);
6034
77
  return(XML_ATTRIBUTE_ENTITIES);
6035
1.20M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
80.4k
  SKIP(8);
6037
80.4k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
1.12M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
860k
  SKIP(7);
6040
860k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
860k
     }
6042
266k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
3.41M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
1.17M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
1.17M
    const xmlChar *elemName;
6061
1.17M
    const xmlChar *attrName;
6062
1.17M
    xmlEnumerationPtr tree;
6063
6064
1.17M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
1.17M
    SKIP(2);
6067
6068
1.17M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
1.17M
  int inputid = ctxt->input->id;
6070
6071
1.17M
  SKIP(7);
6072
1.17M
  if (SKIP_BLANKS == 0) {
6073
628
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
628
                     "Space required after '<!ATTLIST'\n");
6075
628
  }
6076
1.17M
        elemName = xmlParseName(ctxt);
6077
1.17M
  if (elemName == NULL) {
6078
444
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
444
         "ATTLIST: no name for Element\n");
6080
444
      return;
6081
444
  }
6082
1.17M
  SKIP_BLANKS;
6083
1.17M
  GROW;
6084
4.58M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
3.42M
      int type;
6086
3.42M
      int def;
6087
3.42M
      xmlChar *defaultValue = NULL;
6088
6089
3.42M
      GROW;
6090
3.42M
            tree = NULL;
6091
3.42M
      attrName = xmlParseName(ctxt);
6092
3.42M
      if (attrName == NULL) {
6093
2.49k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
2.49k
             "ATTLIST: no name for Attribute\n");
6095
2.49k
    break;
6096
2.49k
      }
6097
3.41M
      GROW;
6098
3.41M
      if (SKIP_BLANKS == 0) {
6099
664
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
664
            "Space required after the attribute name\n");
6101
664
    break;
6102
664
      }
6103
6104
3.41M
      type = xmlParseAttributeType(ctxt, &tree);
6105
3.41M
      if (type <= 0) {
6106
2.42k
          break;
6107
2.42k
      }
6108
6109
3.41M
      GROW;
6110
3.41M
      if (SKIP_BLANKS == 0) {
6111
1.38k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
1.38k
             "Space required after the attribute type\n");
6113
1.38k
          if (tree != NULL)
6114
787
        xmlFreeEnumeration(tree);
6115
1.38k
    break;
6116
1.38k
      }
6117
6118
3.41M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
3.41M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
3.41M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
85.7k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
3.41M
      GROW;
6130
3.41M
            if (RAW != '>') {
6131
3.26M
    if (SKIP_BLANKS == 0) {
6132
2.66k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
2.66k
      "Space required after the attribute default value\n");
6134
2.66k
        if (defaultValue != NULL)
6135
943
      xmlFree(defaultValue);
6136
2.66k
        if (tree != NULL)
6137
232
      xmlFreeEnumeration(tree);
6138
2.66k
        break;
6139
2.66k
    }
6140
3.26M
      }
6141
3.41M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
3.41M
    (ctxt->sax->attributeDecl != NULL))
6143
3.19M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
3.19M
                          type, def, defaultValue, tree);
6145
214k
      else if (tree != NULL)
6146
13.6k
    xmlFreeEnumeration(tree);
6147
6148
3.41M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
3.41M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
3.41M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
179k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
179k
      }
6153
3.41M
      if (ctxt->sax2) {
6154
2.51M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
2.51M
      }
6156
3.41M
      if (defaultValue != NULL)
6157
244k
          xmlFree(defaultValue);
6158
3.41M
      GROW;
6159
3.41M
  }
6160
1.17M
  if (RAW == '>') {
6161
1.16M
      if (inputid != ctxt->input->id) {
6162
81
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
81
                               "Attribute list declaration doesn't start and"
6164
81
                               " stop in the same entity\n");
6165
81
      }
6166
1.16M
      NEXT;
6167
1.16M
  }
6168
1.17M
    }
6169
1.17M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
502k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
502k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
502k
    const xmlChar *elem = NULL;
6196
6197
502k
    GROW;
6198
502k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
502k
  SKIP(7);
6200
502k
  SKIP_BLANKS;
6201
502k
  SHRINK;
6202
502k
  if (RAW == ')') {
6203
282k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
282k
      NEXT;
6209
282k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
282k
      if (ret == NULL)
6211
0
          return(NULL);
6212
282k
      if (RAW == '*') {
6213
44
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
44
    NEXT;
6215
44
      }
6216
282k
      return(ret);
6217
282k
  }
6218
219k
  if ((RAW == '(') || (RAW == '|')) {
6219
219k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
219k
      if (ret == NULL) return(NULL);
6221
219k
  }
6222
2.68M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
2.46M
      NEXT;
6224
2.46M
      if (elem == NULL) {
6225
219k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
219k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
219k
    ret->c1 = cur;
6231
219k
    if (cur != NULL)
6232
219k
        cur->parent = ret;
6233
219k
    cur = ret;
6234
2.24M
      } else {
6235
2.24M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
2.24M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
2.24M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
2.24M
    if (n->c1 != NULL)
6242
2.24M
        n->c1->parent = n;
6243
2.24M
          cur->c2 = n;
6244
2.24M
    if (n != NULL)
6245
2.24M
        n->parent = cur;
6246
2.24M
    cur = n;
6247
2.24M
      }
6248
2.46M
      SKIP_BLANKS;
6249
2.46M
      elem = xmlParseName(ctxt);
6250
2.46M
      if (elem == NULL) {
6251
289
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
289
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
289
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
289
    return(NULL);
6255
289
      }
6256
2.46M
      SKIP_BLANKS;
6257
2.46M
      GROW;
6258
2.46M
  }
6259
219k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
218k
      if (elem != NULL) {
6261
218k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
218k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
218k
    if (cur->c2 != NULL)
6264
218k
        cur->c2->parent = cur;
6265
218k
            }
6266
218k
            if (ret != NULL)
6267
218k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
218k
      if (ctxt->input->id != inputchk) {
6269
11
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
11
                               "Element content declaration doesn't start and"
6271
11
                               " stop in the same entity\n");
6272
11
      }
6273
218k
      SKIP(2);
6274
218k
  } else {
6275
1.12k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
1.12k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
1.12k
      return(NULL);
6278
1.12k
  }
6279
6280
219k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
218k
    return(ret);
6284
502k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
777k
                                       int depth) {
6321
777k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
777k
    const xmlChar *elem;
6323
777k
    xmlChar type = 0;
6324
6325
777k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
777k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
777k
    SKIP_BLANKS;
6333
777k
    GROW;
6334
777k
    if (RAW == '(') {
6335
35.9k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
35.9k
  NEXT;
6339
35.9k
  SKIP_BLANKS;
6340
35.9k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
35.9k
                                                           depth + 1);
6342
35.9k
        if (cur == NULL)
6343
2.42k
            return(NULL);
6344
33.5k
  SKIP_BLANKS;
6345
33.5k
  GROW;
6346
741k
    } else {
6347
741k
  elem = xmlParseName(ctxt);
6348
741k
  if (elem == NULL) {
6349
1.77k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
1.77k
      return(NULL);
6351
1.77k
  }
6352
739k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
739k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
739k
  GROW;
6358
739k
  if (RAW == '?') {
6359
52.2k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
52.2k
      NEXT;
6361
687k
  } else if (RAW == '*') {
6362
38.7k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
38.7k
      NEXT;
6364
648k
  } else if (RAW == '+') {
6365
150k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
150k
      NEXT;
6367
497k
  } else {
6368
497k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
497k
  }
6370
739k
  GROW;
6371
739k
    }
6372
772k
    SKIP_BLANKS;
6373
772k
    SHRINK;
6374
3.96M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
3.19M
        if (RAW == ',') {
6379
670k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
392k
      else if (type != CUR) {
6385
49
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
49
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
49
                      type);
6388
49
    if ((last != NULL) && (last != ret))
6389
49
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
49
    if (ret != NULL)
6391
49
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
49
    return(NULL);
6393
49
      }
6394
670k
      NEXT;
6395
6396
670k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
670k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
670k
      if (last == NULL) {
6404
278k
    op->c1 = ret;
6405
278k
    if (ret != NULL)
6406
278k
        ret->parent = op;
6407
278k
    ret = cur = op;
6408
392k
      } else {
6409
392k
          cur->c2 = op;
6410
392k
    if (op != NULL)
6411
392k
        op->parent = cur;
6412
392k
    op->c1 = last;
6413
392k
    if (last != NULL)
6414
392k
        last->parent = op;
6415
392k
    cur =op;
6416
392k
    last = NULL;
6417
392k
      }
6418
2.52M
  } else if (RAW == '|') {
6419
2.51M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
2.24M
      else if (type != CUR) {
6425
71
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
71
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
71
          type);
6428
71
    if ((last != NULL) && (last != ret))
6429
71
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
71
    if (ret != NULL)
6431
71
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
71
    return(NULL);
6433
71
      }
6434
2.51M
      NEXT;
6435
6436
2.51M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
2.51M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
2.51M
      if (last == NULL) {
6445
270k
    op->c1 = ret;
6446
270k
    if (ret != NULL)
6447
270k
        ret->parent = op;
6448
270k
    ret = cur = op;
6449
2.24M
      } else {
6450
2.24M
          cur->c2 = op;
6451
2.24M
    if (op != NULL)
6452
2.24M
        op->parent = cur;
6453
2.24M
    op->c1 = last;
6454
2.24M
    if (last != NULL)
6455
2.24M
        last->parent = op;
6456
2.24M
    cur =op;
6457
2.24M
    last = NULL;
6458
2.24M
      }
6459
2.51M
  } else {
6460
2.19k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
2.19k
      if ((last != NULL) && (last != ret))
6462
979
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
2.19k
      if (ret != NULL)
6464
2.19k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
2.19k
      return(NULL);
6466
2.19k
  }
6467
3.18M
  GROW;
6468
3.18M
  SKIP_BLANKS;
6469
3.18M
  GROW;
6470
3.18M
  if (RAW == '(') {
6471
148k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
148k
      NEXT;
6474
148k
      SKIP_BLANKS;
6475
148k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
148k
                                                          depth + 1);
6477
148k
            if (last == NULL) {
6478
471
    if (ret != NULL)
6479
471
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
471
    return(NULL);
6481
471
            }
6482
147k
      SKIP_BLANKS;
6483
3.04M
  } else {
6484
3.04M
      elem = xmlParseName(ctxt);
6485
3.04M
      if (elem == NULL) {
6486
528
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
528
    if (ret != NULL)
6488
528
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
528
    return(NULL);
6490
528
      }
6491
3.04M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
3.04M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
3.04M
      if (RAW == '?') {
6498
211k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
211k
    NEXT;
6500
2.82M
      } else if (RAW == '*') {
6501
137k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
137k
    NEXT;
6503
2.69M
      } else if (RAW == '+') {
6504
40.8k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
40.8k
    NEXT;
6506
2.65M
      } else {
6507
2.65M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
2.65M
      }
6509
3.04M
  }
6510
3.18M
  SKIP_BLANKS;
6511
3.18M
  GROW;
6512
3.18M
    }
6513
769k
    if ((cur != NULL) && (last != NULL)) {
6514
547k
        cur->c2 = last;
6515
547k
  if (last != NULL)
6516
547k
      last->parent = cur;
6517
547k
    }
6518
769k
    if (ctxt->input->id != inputchk) {
6519
68
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
68
                       "Element content declaration doesn't start and stop in"
6521
68
                       " the same entity\n");
6522
68
    }
6523
769k
    NEXT;
6524
769k
    if (RAW == '?') {
6525
18.6k
  if (ret != NULL) {
6526
18.6k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
18.6k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
18
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
18.6k
      else
6530
18.6k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
18.6k
  }
6532
18.6k
  NEXT;
6533
750k
    } else if (RAW == '*') {
6534
170k
  if (ret != NULL) {
6535
170k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
170k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
1.48M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
1.31M
    if ((cur->c1 != NULL) &&
6543
1.31M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
1.31M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
1.83k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
1.31M
    if ((cur->c2 != NULL) &&
6547
1.31M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
1.31M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
295
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
1.31M
    cur = cur->c2;
6551
1.31M
      }
6552
170k
  }
6553
170k
  NEXT;
6554
580k
    } else if (RAW == '+') {
6555
138k
  if (ret != NULL) {
6556
138k
      int found = 0;
6557
6558
138k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
138k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
12
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
138k
      else
6562
138k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
244k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
105k
    if ((cur->c1 != NULL) &&
6570
105k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
105k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
6
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
6
        found = 1;
6574
6
    }
6575
105k
    if ((cur->c2 != NULL) &&
6576
105k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
105k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
9
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
9
        found = 1;
6580
9
    }
6581
105k
    cur = cur->c2;
6582
105k
      }
6583
138k
      if (found)
6584
15
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
138k
  }
6586
138k
  NEXT;
6587
138k
    }
6588
769k
    return(ret);
6589
772k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
1.09M
                           xmlElementContentPtr *result) {
6648
6649
1.09M
    xmlElementContentPtr tree = NULL;
6650
1.09M
    int inputid = ctxt->input->id;
6651
1.09M
    int res;
6652
6653
1.09M
    *result = NULL;
6654
6655
1.09M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
1.09M
    NEXT;
6661
1.09M
    GROW;
6662
1.09M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
1.09M
    SKIP_BLANKS;
6665
1.09M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
502k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
502k
  res = XML_ELEMENT_TYPE_MIXED;
6668
592k
    } else {
6669
592k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
592k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
592k
    }
6672
1.09M
    SKIP_BLANKS;
6673
1.09M
    *result = tree;
6674
1.09M
    return(res);
6675
1.09M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
1.30M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
1.30M
    const xmlChar *name;
6695
1.30M
    int ret = -1;
6696
1.30M
    xmlElementContentPtr content  = NULL;
6697
6698
1.30M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
1.30M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
1.30M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
1.29M
  int inputid = ctxt->input->id;
6705
6706
1.29M
  SKIP(7);
6707
1.29M
  if (SKIP_BLANKS == 0) {
6708
255
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
255
               "Space required after 'ELEMENT'\n");
6710
255
      return(-1);
6711
255
  }
6712
1.29M
        name = xmlParseName(ctxt);
6713
1.29M
  if (name == NULL) {
6714
250
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
250
         "xmlParseElementDecl: no name for Element\n");
6716
250
      return(-1);
6717
250
  }
6718
1.29M
  if (SKIP_BLANKS == 0) {
6719
1.20k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
1.20k
         "Space required after the element name\n");
6721
1.20k
  }
6722
1.29M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
197k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
197k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
1.10M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
1.10M
             (NXT(2) == 'Y')) {
6730
4.07k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
4.07k
      ret = XML_ELEMENT_TYPE_ANY;
6735
1.09M
  } else if (RAW == '(') {
6736
1.09M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
1.09M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
2.08k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
2.08k
          (ctxt->inputNr == 1)) {
6743
63
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
63
    "PEReference: forbidden within markup decl in internal subset\n");
6745
2.02k
      } else {
6746
2.02k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
2.02k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
2.02k
            }
6749
2.08k
      return(-1);
6750
2.08k
  }
6751
6752
1.29M
  SKIP_BLANKS;
6753
6754
1.29M
  if (RAW != '>') {
6755
5.88k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
5.88k
      if (content != NULL) {
6757
406
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
406
      }
6759
1.29M
  } else {
6760
1.29M
      if (inputid != ctxt->input->id) {
6761
112
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
112
                               "Element declaration doesn't start and stop in"
6763
112
                               " the same entity\n");
6764
112
      }
6765
6766
1.29M
      NEXT;
6767
1.29M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
1.29M
    (ctxt->sax->elementDecl != NULL)) {
6769
1.16M
    if (content != NULL)
6770
982k
        content->parent = NULL;
6771
1.16M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
1.16M
                           content);
6773
1.16M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
259
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
259
    }
6782
1.16M
      } else if (content != NULL) {
6783
106k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
106k
      }
6785
1.29M
  }
6786
1.29M
    }
6787
1.29M
    return(ret);
6788
1.30M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
1.51k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
1.51k
    int *inputIds = NULL;
6806
1.51k
    size_t inputIdsSize = 0;
6807
1.51k
    size_t depth = 0;
6808
6809
8.14k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
8.12k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
4.73k
            int id = ctxt->input->id;
6812
6813
4.73k
            SKIP(3);
6814
4.73k
            SKIP_BLANKS;
6815
6816
4.73k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
4.03k
                SKIP(7);
6818
4.03k
                SKIP_BLANKS;
6819
4.03k
                if (RAW != '[') {
6820
46
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
46
                    xmlHaltParser(ctxt);
6822
46
                    goto error;
6823
46
                }
6824
3.98k
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
3.98k
                NEXT;
6830
6831
3.98k
                if (inputIdsSize <= depth) {
6832
1.20k
                    int *tmp;
6833
6834
1.20k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
1.20k
                    tmp = (int *) xmlRealloc(inputIds,
6836
1.20k
                            inputIdsSize * sizeof(int));
6837
1.20k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
1.20k
                    inputIds = tmp;
6842
1.20k
                }
6843
3.98k
                inputIds[depth] = id;
6844
3.98k
                depth++;
6845
3.98k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
335
                size_t ignoreDepth = 0;
6847
6848
335
                SKIP(6);
6849
335
                SKIP_BLANKS;
6850
335
                if (RAW != '[') {
6851
14
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
14
                    xmlHaltParser(ctxt);
6853
14
                    goto error;
6854
14
                }
6855
321
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
321
                NEXT;
6861
6862
40.3k
                while (RAW != 0) {
6863
40.2k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
750
                        SKIP(3);
6865
750
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
750
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
39.4k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
39.4k
                               (NXT(2) == '>')) {
6873
713
                        if (ignoreDepth == 0)
6874
166
                            break;
6875
547
                        SKIP(3);
6876
547
                        ignoreDepth--;
6877
38.7k
                    } else {
6878
38.7k
                        NEXT;
6879
38.7k
                    }
6880
40.2k
                }
6881
6882
321
    if (RAW == 0) {
6883
155
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
155
                    goto error;
6885
155
    }
6886
166
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
166
                SKIP(3);
6892
365
            } else {
6893
365
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
365
                xmlHaltParser(ctxt);
6895
365
                goto error;
6896
365
            }
6897
4.73k
        } else if ((depth > 0) &&
6898
3.39k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
2.11k
            depth--;
6900
2.11k
            if (ctxt->input->id != inputIds[depth]) {
6901
83
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
83
                               "All markup of the conditional section is not"
6903
83
                               " in the same entity\n");
6904
83
            }
6905
2.11k
            SKIP(3);
6906
2.11k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
881
            xmlParseMarkupDecl(ctxt);
6908
881
        } else {
6909
402
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
402
            xmlHaltParser(ctxt);
6911
402
            goto error;
6912
402
        }
6913
6914
7.14k
        if (depth == 0)
6915
521
            break;
6916
6917
6.62k
        SKIP_BLANKS;
6918
6.62k
        GROW;
6919
6.62k
    }
6920
6921
1.51k
error:
6922
1.51k
    xmlFree(inputIds);
6923
1.51k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
64.8M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
64.8M
    GROW;
6952
64.8M
    if (CUR == '<') {
6953
64.8M
        if (NXT(1) == '!') {
6954
64.8M
      switch (NXT(2)) {
6955
2.29M
          case 'E':
6956
2.29M
        if (NXT(3) == 'L')
6957
1.30M
      xmlParseElementDecl(ctxt);
6958
990k
        else if (NXT(3) == 'N')
6959
990k
      xmlParseEntityDecl(ctxt);
6960
372
                    else
6961
372
                        SKIP(2);
6962
2.29M
        break;
6963
1.17M
          case 'A':
6964
1.17M
        xmlParseAttributeListDecl(ctxt);
6965
1.17M
        break;
6966
1.25k
          case 'N':
6967
1.25k
        xmlParseNotationDecl(ctxt);
6968
1.25k
        break;
6969
61.3M
          case '-':
6970
61.3M
        xmlParseComment(ctxt);
6971
61.3M
        break;
6972
9.37k
    default:
6973
        /* there is an error but it will be detected later */
6974
9.37k
                    SKIP(2);
6975
9.37k
        break;
6976
64.8M
      }
6977
64.8M
  } else if (NXT(1) == '?') {
6978
760
      xmlParsePI(ctxt);
6979
760
  }
6980
64.8M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
64.8M
    if (ctxt->instate == XML_PARSER_EOF)
6987
5.28k
        return;
6988
6989
64.8M
    ctxt->instate = XML_PARSER_DTD;
6990
64.8M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
3.88k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
3.88k
    xmlChar *version;
7006
3.88k
    const xmlChar *encoding;
7007
3.88k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
3.88k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
3.81k
  SKIP(5);
7014
3.81k
    } else {
7015
68
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
68
  return;
7017
68
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
3.81k
    oldstate = ctxt->instate;
7021
3.81k
    ctxt->instate = XML_PARSER_START;
7022
7023
3.81k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
3.81k
    version = xmlParseVersionInfo(ctxt);
7032
3.81k
    if (version == NULL)
7033
282
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
3.53k
    else {
7035
3.53k
  if (SKIP_BLANKS == 0) {
7036
95
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
95
               "Space needed here\n");
7038
95
  }
7039
3.53k
    }
7040
3.81k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
3.81k
    encoding = xmlParseEncodingDecl(ctxt);
7046
3.81k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
3.81k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
50
        ctxt->instate = oldstate;
7053
50
        return;
7054
50
    }
7055
3.76k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
1.12k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
1.12k
           "Missing encoding in text declaration\n");
7058
1.12k
    }
7059
7060
3.76k
    SKIP_BLANKS;
7061
3.76k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
2.80k
        SKIP(2);
7063
2.80k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
45
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
45
  NEXT;
7067
920
    } else {
7068
920
        int c;
7069
7070
920
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
77.4k
        while ((c = CUR) != 0) {
7072
77.1k
            NEXT;
7073
77.1k
            if (c == '>')
7074
608
                break;
7075
77.1k
        }
7076
920
    }
7077
7078
3.76k
    ctxt->instate = oldstate;
7079
3.76k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
22.9k
                       const xmlChar *SystemID) {
7096
22.9k
    xmlDetectSAX2(ctxt);
7097
22.9k
    GROW;
7098
7099
22.9k
    if ((ctxt->encoding == NULL) &&
7100
22.9k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
22.9k
        xmlChar start[4];
7102
22.9k
  xmlCharEncoding enc;
7103
7104
22.9k
  start[0] = RAW;
7105
22.9k
  start[1] = NXT(1);
7106
22.9k
  start[2] = NXT(2);
7107
22.9k
  start[3] = NXT(3);
7108
22.9k
  enc = xmlDetectCharEncoding(start, 4);
7109
22.9k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
3.66k
      xmlSwitchEncoding(ctxt, enc);
7111
22.9k
    }
7112
7113
22.9k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
3.40k
  xmlParseTextDecl(ctxt);
7115
3.40k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
41
      xmlHaltParser(ctxt);
7120
41
      return;
7121
41
  }
7122
3.40k
    }
7123
22.9k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
22.9k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
22.9k
    ctxt->instate = XML_PARSER_DTD;
7135
22.9k
    ctxt->external = 1;
7136
22.9k
    SKIP_BLANKS;
7137
5.53M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
5.52M
  GROW;
7139
5.52M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
1.51k
            xmlParseConditionalSections(ctxt);
7141
5.52M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
5.51M
            xmlParseMarkupDecl(ctxt);
7143
5.51M
        } else {
7144
7.98k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
7.98k
            xmlHaltParser(ctxt);
7146
7.98k
            return;
7147
7.98k
        }
7148
5.51M
        SKIP_BLANKS;
7149
5.51M
    }
7150
7151
14.9k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
14.9k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
1.66M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
1.66M
    xmlEntityPtr ent;
7175
1.66M
    xmlChar *val;
7176
1.66M
    int was_checked;
7177
1.66M
    xmlNodePtr list = NULL;
7178
1.66M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
1.66M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
1.66M
    if (NXT(1) == '#') {
7188
20.1k
  int i = 0;
7189
20.1k
  xmlChar out[16];
7190
20.1k
  int hex = NXT(2);
7191
20.1k
  int value = xmlParseCharRef(ctxt);
7192
7193
20.1k
  if (value == 0)
7194
3.37k
      return;
7195
16.8k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
7.65k
      if (value <= 0xFF) {
7202
7.45k
    out[0] = value;
7203
7.45k
    out[1] = 0;
7204
7.45k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
7.45k
        (!ctxt->disableSAX))
7206
6.63k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
7.45k
      } else {
7208
201
    if ((hex == 'x') || (hex == 'X'))
7209
100
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
101
    else
7211
101
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
201
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
201
        (!ctxt->disableSAX))
7214
161
        ctxt->sax->reference(ctxt->userData, out);
7215
201
      }
7216
9.15k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
9.15k
      COPY_BUF(0 ,out, i, value);
7221
9.15k
      out[i] = 0;
7222
9.15k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
9.15k
    (!ctxt->disableSAX))
7224
7.72k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
9.15k
  }
7226
16.8k
  return;
7227
20.1k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
1.64M
    ent = xmlParseEntityRef(ctxt);
7233
1.64M
    if (ent == NULL) return;
7234
1.57M
    if (!ctxt->wellFormed)
7235
924k
  return;
7236
648k
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
648k
    if ((ent->name == NULL) ||
7240
648k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
53.0k
  val = ent->content;
7242
53.0k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
53.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
53.0k
      (!ctxt->disableSAX))
7248
53.0k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
53.0k
  return;
7250
53.0k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
595k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
595k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
22.2k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
21.1k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
21.1k
  void *user_data;
7273
21.1k
  if (ctxt->userData == ctxt)
7274
21.1k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
21.1k
        ctxt->sizeentcopy = 0;
7280
7281
21.1k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
253
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
253
            xmlHaltParser(ctxt);
7284
253
            return;
7285
253
        }
7286
7287
20.8k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
20.8k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
9.98k
      ctxt->depth++;
7297
9.98k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
9.98k
                                                user_data, &list);
7299
9.98k
      ctxt->depth--;
7300
7301
10.8k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
10.8k
      ctxt->depth++;
7303
10.8k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
10.8k
                                     user_data, ctxt->depth, ent->URI,
7305
10.8k
             ent->ExternalID, &list);
7306
10.8k
      ctxt->depth--;
7307
10.8k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
20.8k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
20.8k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
20.8k
        ent->expandedSize = ctxt->sizeentcopy;
7316
20.8k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
2.91k
            xmlHaltParser(ctxt);
7318
2.91k
      xmlFreeNodeList(list);
7319
2.91k
      return;
7320
2.91k
  }
7321
17.9k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
17.9k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
8.30k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
8.30k
            if ((ctxt->replaceEntities == 0) ||
7333
8.30k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
8.30k
                ((list->type == XML_TEXT_NODE) &&
7335
7.57k
                 (list->next == NULL))) {
7336
7.57k
                ent->owner = 1;
7337
84.6k
                while (list != NULL) {
7338
77.0k
                    list->parent = (xmlNodePtr) ent;
7339
77.0k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
77.0k
                    if (list->next == NULL)
7342
7.57k
                        ent->last = list;
7343
77.0k
                    list = list->next;
7344
77.0k
                }
7345
7.57k
                list = NULL;
7346
7.57k
            } else {
7347
732
                ent->owner = 0;
7348
309k
                while (list != NULL) {
7349
308k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
308k
                    list->doc = ctxt->myDoc;
7351
308k
                    if (list->next == NULL)
7352
732
                        ent->last = list;
7353
308k
                    list = list->next;
7354
308k
                }
7355
732
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
732
            }
7361
9.65k
  } else if ((ret != XML_ERR_OK) &&
7362
9.65k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
7.53k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
7.53k
         "Entity '%s' failed to parse\n", ent->name);
7365
7.53k
            if (ent->content != NULL)
7366
1.95k
                ent->content[0] = 0;
7367
7.53k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
17.9k
        was_checked = 0;
7374
17.9k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
592k
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
78.3k
  if (was_checked != 0) {
7389
67.5k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
67.5k
      if (ctxt->userData == ctxt)
7396
67.5k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
67.5k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
0
    ctxt->depth++;
7402
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
0
           ent->content, user_data, NULL);
7404
0
    ctxt->depth--;
7405
67.5k
      } else if (ent->etype ==
7406
67.5k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
67.5k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
67.5k
    ctxt->depth++;
7410
67.5k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
67.5k
         ctxt->sax, user_data, ctxt->depth,
7412
67.5k
         ent->URI, ent->ExternalID, NULL);
7413
67.5k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
67.5k
                ctxt->sizeentities = oldsizeentities;
7417
67.5k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
67.5k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
67.5k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
67.5k
  }
7429
78.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
78.3k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
17.1k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
17.1k
  }
7437
78.3k
  return;
7438
78.3k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
514k
    if ((was_checked != 0) &&
7445
514k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
319
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
513k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
513k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
29.2k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
29.2k
  return;
7458
29.2k
    }
7459
7460
484k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
484k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
484k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
484k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
137k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
137k
    cur = ent->children;
7492
169k
    while (cur != NULL) {
7493
169k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
169k
        if (nw != NULL) {
7495
169k
      if (nw->_private == NULL)
7496
169k
          nw->_private = cur->_private;
7497
169k
      if (firstChild == NULL){
7498
137k
          firstChild = nw;
7499
137k
      }
7500
169k
      nw = xmlAddChild(ctxt->node, nw);
7501
169k
        }
7502
169k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
137k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
137k
          (nw != NULL) &&
7509
137k
          (nw->type == XML_ELEMENT_NODE) &&
7510
137k
          (nw->children == NULL))
7511
125
          nw->extra = 1;
7512
7513
137k
      break;
7514
137k
        }
7515
32.2k
        cur = cur->next;
7516
32.2k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
347k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
347k
    xmlNodePtr nw = NULL, cur, next, last,
7523
347k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
347k
    cur = ent->children;
7532
347k
    ent->children = NULL;
7533
347k
    last = ent->last;
7534
347k
    ent->last = NULL;
7535
2.51M
    while (cur != NULL) {
7536
2.51M
        next = cur->next;
7537
2.51M
        cur->next = NULL;
7538
2.51M
        cur->parent = NULL;
7539
2.51M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
2.51M
        if (nw != NULL) {
7541
2.51M
      if (nw->_private == NULL)
7542
2.51M
          nw->_private = cur->_private;
7543
2.51M
      if (firstChild == NULL){
7544
347k
          firstChild = cur;
7545
347k
      }
7546
2.51M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
2.51M
        }
7548
2.51M
        xmlAddChild(ctxt->node, cur);
7549
2.51M
        if (cur == last)
7550
347k
      break;
7551
2.16M
        cur = next;
7552
2.16M
    }
7553
347k
    if (ent->owner == 0)
7554
732
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
347k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
484k
      ctxt->nodemem = 0;
7582
484k
      ctxt->nodelen = 0;
7583
484k
      return;
7584
484k
  }
7585
484k
    }
7586
484k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
2.49M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
2.49M
    const xmlChar *name;
7621
2.49M
    xmlEntityPtr ent = NULL;
7622
7623
2.49M
    GROW;
7624
2.49M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
2.49M
    if (RAW != '&')
7628
0
        return(NULL);
7629
2.49M
    NEXT;
7630
2.49M
    name = xmlParseName(ctxt);
7631
2.49M
    if (name == NULL) {
7632
8.80k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
8.80k
           "xmlParseEntityRef: no name\n");
7634
8.80k
        return(NULL);
7635
8.80k
    }
7636
2.48M
    if (RAW != ';') {
7637
8.71k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
8.71k
  return(NULL);
7639
8.71k
    }
7640
2.47M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
2.47M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
1.27M
        ent = xmlGetPredefinedEntity(name);
7647
1.27M
        if (ent != NULL)
7648
130k
            return(ent);
7649
1.27M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
2.34M
    if (ctxt->sax != NULL) {
7656
2.34M
  if (ctxt->sax->getEntity != NULL)
7657
2.34M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
2.34M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
2.34M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
428
      ent = xmlGetPredefinedEntity(name);
7661
2.34M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
2.34M
      (ctxt->userData==ctxt)) {
7663
3.47k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
3.47k
  }
7665
2.34M
    }
7666
2.34M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
2.34M
    if (ent == NULL) {
7690
76.1k
  if ((ctxt->standalone == 1) ||
7691
76.1k
      ((ctxt->hasExternalSubset == 0) &&
7692
75.8k
       (ctxt->hasPErefs == 0))) {
7693
71.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
71.9k
         "Entity '%s' not defined\n", name);
7695
71.9k
  } else {
7696
4.16k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
4.16k
         "Entity '%s' not defined\n", name);
7698
4.16k
      if ((ctxt->inSubset == 0) &&
7699
4.16k
    (ctxt->sax != NULL) &&
7700
4.16k
    (ctxt->sax->reference != NULL)) {
7701
4.07k
    ctxt->sax->reference(ctxt->userData, name);
7702
4.07k
      }
7703
4.16k
  }
7704
76.1k
  ctxt->valid = 0;
7705
76.1k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
2.27M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
14
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
14
     "Entity reference to unparsed entity %s\n", name);
7715
14
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
2.27M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
2.27M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
3.80k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
3.80k
       "Attribute references external entity '%s'\n", name);
7726
3.80k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
2.26M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
2.26M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
791k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
5.08k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
151
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
5.08k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
5.08k
        }
7740
791k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
306
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
306
                    "'<' in entity '%s' is not allowed in attributes "
7743
306
                    "values\n", name);
7744
791k
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
1.47M
    else {
7750
1.47M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
1.47M
      default:
7758
1.47M
      break;
7759
1.47M
  }
7760
1.47M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
2.34M
    return(ent);
7769
2.34M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
26.8M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
26.8M
    xmlChar *name;
7805
26.8M
    const xmlChar *ptr;
7806
26.8M
    xmlChar cur;
7807
26.8M
    xmlEntityPtr ent = NULL;
7808
7809
26.8M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
26.8M
    ptr = *str;
7812
26.8M
    cur = *ptr;
7813
26.8M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
26.8M
    ptr++;
7817
26.8M
    name = xmlParseStringName(ctxt, &ptr);
7818
26.8M
    if (name == NULL) {
7819
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
0
           "xmlParseStringEntityRef: no name\n");
7821
0
  *str = ptr;
7822
0
  return(NULL);
7823
0
    }
7824
26.8M
    if (*ptr != ';') {
7825
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
0
        xmlFree(name);
7827
0
  *str = ptr;
7828
0
  return(NULL);
7829
0
    }
7830
26.8M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
26.8M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
20.8M
        ent = xmlGetPredefinedEntity(name);
7838
20.8M
        if (ent != NULL) {
7839
288
            xmlFree(name);
7840
288
            *str = ptr;
7841
288
            return(ent);
7842
288
        }
7843
20.8M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
26.8M
    if (ctxt->sax != NULL) {
7850
26.8M
  if (ctxt->sax->getEntity != NULL)
7851
26.8M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
26.8M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
763k
      ent = xmlGetPredefinedEntity(name);
7854
26.8M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
2.81M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
2.81M
  }
7857
26.8M
    }
7858
26.8M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
26.8M
    if (ent == NULL) {
7885
2.81M
  if ((ctxt->standalone == 1) ||
7886
2.81M
      ((ctxt->hasExternalSubset == 0) &&
7887
2.81M
       (ctxt->hasPErefs == 0))) {
7888
2.81M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
2.81M
         "Entity '%s' not defined\n", name);
7890
2.81M
  } else {
7891
435
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
435
        "Entity '%s' not defined\n",
7893
435
        name);
7894
435
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
2.81M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
24.0M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
0
     "Entity reference to unparsed entity %s\n", name);
7906
0
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
24.0M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
24.0M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
0
   "Attribute references external entity '%s'\n", name);
7917
0
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
24.0M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
24.0M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
24.0M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
7.63k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
63
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
7.63k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
7.63k
        }
7931
24.0M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
64.3k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
64.3k
                    "'<' in entity '%s' is not allowed in attributes "
7934
64.3k
                    "values\n", name);
7935
24.0M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
1.20k
    else {
7941
1.20k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
1.20k
      default:
7949
1.20k
      break;
7950
1.20k
  }
7951
1.20k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
26.8M
    xmlFree(name);
7961
26.8M
    *str = ptr;
7962
26.8M
    return(ent);
7963
26.8M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
66.2M
{
8000
66.2M
    const xmlChar *name;
8001
66.2M
    xmlEntityPtr entity = NULL;
8002
66.2M
    xmlParserInputPtr input;
8003
8004
66.2M
    if (RAW != '%')
8005
0
        return;
8006
66.2M
    NEXT;
8007
66.2M
    name = xmlParseName(ctxt);
8008
66.2M
    if (name == NULL) {
8009
5.90k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
5.90k
  return;
8011
5.90k
    }
8012
66.2M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
66.2M
    if (RAW != ';') {
8016
397k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
397k
        return;
8018
397k
    }
8019
8020
65.8M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
65.8M
    if ((ctxt->sax != NULL) &&
8026
65.8M
  (ctxt->sax->getParameterEntity != NULL))
8027
65.8M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
65.8M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
65.8M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
5.78M
  if ((ctxt->standalone == 1) ||
8040
5.78M
      ((ctxt->hasExternalSubset == 0) &&
8041
5.78M
       (ctxt->hasPErefs == 0))) {
8042
416
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
416
            "PEReference: %%%s; not found\n",
8044
416
            name);
8045
5.78M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
5.78M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
1.96k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
1.96k
                                 "PEReference: %%%s; not found\n",
8056
1.96k
                                 name, NULL);
8057
1.96k
            } else
8058
5.78M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
5.78M
                              "PEReference: %%%s; not found\n",
8060
5.78M
                              name, NULL);
8061
5.78M
            ctxt->valid = 0;
8062
5.78M
  }
8063
60.0M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
60.0M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
60.0M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
60.0M
  } else {
8073
60.0M
            xmlChar start[4];
8074
60.0M
            xmlCharEncoding enc;
8075
60.0M
            unsigned long parentConsumed;
8076
60.0M
            xmlEntityPtr oldEnt;
8077
8078
60.0M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
60.0M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
60.0M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
60.0M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
60.0M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
60.0M
    (ctxt->replaceEntities == 0) &&
8084
60.0M
    (ctxt->validate == 0))
8085
47
    return;
8086
8087
60.0M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
69
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
69
                xmlHaltParser(ctxt);
8090
69
                return;
8091
69
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
60.0M
            parentConsumed = ctxt->input->parentConsumed;
8095
60.0M
            oldEnt = ctxt->input->entity;
8096
60.0M
            if ((oldEnt == NULL) ||
8097
60.0M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
58.6M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
1.69M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
1.69M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
1.69M
                                     ctxt->input->cur - ctxt->input->base);
8102
1.69M
            }
8103
8104
60.0M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
60.0M
      if (xmlPushInput(ctxt, input) < 0) {
8106
385
                xmlFreeInputStream(input);
8107
385
    return;
8108
385
            }
8109
8110
60.0M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
60.0M
            input->parentConsumed = parentConsumed;
8113
8114
60.0M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
3.10k
                GROW
8125
3.10k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
3.10k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
3.10k
                    start[0] = RAW;
8129
3.10k
                    start[1] = NXT(1);
8130
3.10k
                    start[2] = NXT(2);
8131
3.10k
                    start[3] = NXT(3);
8132
3.10k
                    enc = xmlDetectCharEncoding(start, 4);
8133
3.10k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
3
                        xmlSwitchEncoding(ctxt, enc);
8135
3
                    }
8136
3.10k
                }
8137
8138
3.10k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
3.10k
                    (IS_BLANK_CH(NXT(5)))) {
8140
3
                    xmlParseTextDecl(ctxt);
8141
3
                }
8142
3.10k
            }
8143
60.0M
  }
8144
60.0M
    }
8145
65.8M
    ctxt->hasPErefs = 1;
8146
65.8M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
695
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
695
    xmlParserInputPtr input;
8162
695
    xmlBufferPtr buf;
8163
695
    int l, c;
8164
695
    int count = 0;
8165
8166
695
    if ((ctxt == NULL) || (entity == NULL) ||
8167
695
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
695
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
695
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
695
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
695
    buf = xmlBufferCreate();
8180
695
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
695
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
695
    input = xmlNewEntityInputStream(ctxt, entity);
8188
695
    if (input == NULL) {
8189
99
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
99
              "xmlLoadEntityContent input error");
8191
99
  xmlBufferFree(buf);
8192
99
        return(-1);
8193
99
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
596
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
596
    GROW;
8206
596
    c = CUR_CHAR(l);
8207
50.0k
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
50.0k
           (IS_CHAR(c))) {
8209
49.4k
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
49.4k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
263
      count = 0;
8212
263
      GROW;
8213
263
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
263
  }
8218
49.4k
  NEXTL(l);
8219
49.4k
  c = CUR_CHAR(l);
8220
49.4k
  if (c == 0) {
8221
490
      count = 0;
8222
490
      GROW;
8223
490
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
490
      c = CUR_CHAR(l);
8228
490
  }
8229
49.4k
    }
8230
8231
596
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
403
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
403
        xmlPopInput(ctxt);
8234
403
    } else if (!IS_CHAR(c)) {
8235
193
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
193
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
193
                    c);
8238
193
  xmlBufferFree(buf);
8239
193
  return(-1);
8240
193
    }
8241
403
    entity->content = buf->content;
8242
403
    entity->length = buf->use;
8243
403
    buf->content = NULL;
8244
403
    xmlBufferFree(buf);
8245
8246
403
    return(0);
8247
596
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
847k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
847k
    const xmlChar *ptr;
8283
847k
    xmlChar cur;
8284
847k
    xmlChar *name;
8285
847k
    xmlEntityPtr entity = NULL;
8286
8287
847k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
847k
    ptr = *str;
8289
847k
    cur = *ptr;
8290
847k
    if (cur != '%')
8291
0
        return(NULL);
8292
847k
    ptr++;
8293
847k
    name = xmlParseStringName(ctxt, &ptr);
8294
847k
    if (name == NULL) {
8295
134
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
134
           "xmlParseStringPEReference: no name\n");
8297
134
  *str = ptr;
8298
134
  return(NULL);
8299
134
    }
8300
847k
    cur = *ptr;
8301
847k
    if (cur != ';') {
8302
147
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
147
  xmlFree(name);
8304
147
  *str = ptr;
8305
147
  return(NULL);
8306
147
    }
8307
847k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
847k
    if ((ctxt->sax != NULL) &&
8313
847k
  (ctxt->sax->getParameterEntity != NULL))
8314
847k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
847k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
847k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
28.3k
  if ((ctxt->standalone == 1) ||
8330
28.3k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
28.3k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
28.3k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
28.3k
        "PEReference: %%%s; not found\n",
8343
28.3k
        name, NULL);
8344
28.3k
      ctxt->valid = 0;
8345
28.3k
  }
8346
819k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
819k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
819k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
819k
    }
8357
847k
    ctxt->hasPErefs = 1;
8358
847k
    xmlFree(name);
8359
847k
    *str = ptr;
8360
847k
    return(entity);
8361
847k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
100k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
100k
    const xmlChar *name = NULL;
8382
100k
    xmlChar *ExternalID = NULL;
8383
100k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
100k
    SKIP(9);
8389
8390
100k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
100k
    name = xmlParseName(ctxt);
8396
100k
    if (name == NULL) {
8397
455
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
455
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
455
    }
8400
100k
    ctxt->intSubName = name;
8401
8402
100k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
100k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
100k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
47.2k
        ctxt->hasExternalSubset = 1;
8411
47.2k
    }
8412
100k
    ctxt->extSubURI = URI;
8413
100k
    ctxt->extSubSystem = ExternalID;
8414
8415
100k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
100k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
100k
  (!ctxt->disableSAX))
8422
95.5k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
100k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
100k
    if (RAW == '[')
8431
73.6k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
26.7k
    if (RAW != '>') {
8437
6.57k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
6.57k
    }
8439
26.7k
    NEXT;
8440
26.7k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
73.4k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
73.4k
    if (RAW == '[') {
8457
73.4k
        int baseInputNr = ctxt->inputNr;
8458
73.4k
        ctxt->instate = XML_PARSER_DTD;
8459
73.4k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
73.4k
  SKIP_BLANKS;
8466
59.4M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
59.4M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
59.3M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
59.3M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
59.3M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
59.3M
          xmlParseMarkupDecl(ctxt);
8478
59.3M
            } else if (RAW == '%') {
8479
7.78k
          xmlParsePEReference(ctxt);
8480
16.4k
            } else {
8481
16.4k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
16.4k
                        "xmlParseInternalSubset: error detected in"
8483
16.4k
                        " Markup declaration\n");
8484
16.4k
                xmlHaltParser(ctxt);
8485
16.4k
                return;
8486
16.4k
            }
8487
59.3M
      SKIP_BLANKS;
8488
59.3M
  }
8489
57.0k
  if (RAW == ']') {
8490
53.2k
      NEXT;
8491
53.2k
      SKIP_BLANKS;
8492
53.2k
  }
8493
57.0k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
57.0k
    if (RAW != '>') {
8499
4.28k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
4.28k
  return;
8501
4.28k
    }
8502
52.7k
    NEXT;
8503
52.7k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
593k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
593k
    const xmlChar *name;
8544
593k
    xmlChar *val;
8545
8546
593k
    *value = NULL;
8547
593k
    GROW;
8548
593k
    name = xmlParseName(ctxt);
8549
593k
    if (name == NULL) {
8550
28.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
28.4k
                 "error parsing attribute name\n");
8552
28.4k
        return(NULL);
8553
28.4k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
565k
    SKIP_BLANKS;
8559
565k
    if (RAW == '=') {
8560
548k
        NEXT;
8561
548k
  SKIP_BLANKS;
8562
548k
  val = xmlParseAttValue(ctxt);
8563
548k
  ctxt->instate = XML_PARSER_CONTENT;
8564
548k
    } else {
8565
16.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
16.6k
         "Specification mandates value for attribute %s\n", name);
8567
16.6k
  return(name);
8568
16.6k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
548k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
2.20k
  if (!xmlCheckLanguageID(val)) {
8577
1.19k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
1.19k
              "Malformed value for xml:lang : %s\n",
8579
1.19k
        val, NULL);
8580
1.19k
  }
8581
2.20k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
548k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
217
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
1
      *(ctxt->space) = 0;
8589
216
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
136
      *(ctxt->space) = 1;
8591
80
  else {
8592
80
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
80
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
80
                                 val, NULL);
8595
80
  }
8596
217
    }
8597
8598
548k
    *value = val;
8599
548k
    return(name);
8600
565k
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
1.32M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
1.32M
    const xmlChar *name;
8634
1.32M
    const xmlChar *attname;
8635
1.32M
    xmlChar *attvalue;
8636
1.32M
    const xmlChar **atts = ctxt->atts;
8637
1.32M
    int nbatts = 0;
8638
1.32M
    int maxatts = ctxt->maxatts;
8639
1.32M
    int i;
8640
8641
1.32M
    if (RAW != '<') return(NULL);
8642
1.32M
    NEXT1;
8643
8644
1.32M
    name = xmlParseName(ctxt);
8645
1.32M
    if (name == NULL) {
8646
7.87k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
7.87k
       "xmlParseStartTag: invalid element name\n");
8648
7.87k
        return(NULL);
8649
7.87k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
1.31M
    SKIP_BLANKS;
8657
1.31M
    GROW;
8658
8659
1.44M
    while (((RAW != '>') &&
8660
1.44M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
1.44M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
593k
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
593k
        if (attname == NULL) {
8664
28.4k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
28.4k
         "xmlParseStartTag: problem parsing attributes\n");
8666
28.4k
      break;
8667
28.4k
  }
8668
565k
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
675k
      for (i = 0; i < nbatts;i += 2) {
8675
129k
          if (xmlStrEqual(atts[i], attname)) {
8676
434
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
434
        xmlFree(attvalue);
8678
434
        goto failed;
8679
434
    }
8680
129k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
546k
      if (atts == NULL) {
8685
28.9k
          maxatts = 22; /* allow for 10 attrs by default */
8686
28.9k
          atts = (const xmlChar **)
8687
28.9k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
28.9k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
28.9k
    ctxt->atts = atts;
8695
28.9k
    ctxt->maxatts = maxatts;
8696
517k
      } else if (nbatts + 4 > maxatts) {
8697
294
          const xmlChar **n;
8698
8699
294
          maxatts *= 2;
8700
294
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
294
               maxatts * sizeof(const xmlChar *));
8702
294
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
294
    atts = n;
8709
294
    ctxt->atts = atts;
8710
294
    ctxt->maxatts = maxatts;
8711
294
      }
8712
546k
      atts[nbatts++] = attname;
8713
546k
      atts[nbatts++] = attvalue;
8714
546k
      atts[nbatts] = NULL;
8715
546k
      atts[nbatts + 1] = NULL;
8716
546k
  } else {
8717
18.5k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
18.5k
  }
8720
8721
565k
failed:
8722
8723
565k
  GROW
8724
565k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
434k
      break;
8726
130k
  if (SKIP_BLANKS == 0) {
8727
32.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
32.3k
         "attributes construct error\n");
8729
32.3k
  }
8730
130k
  SHRINK;
8731
130k
        GROW;
8732
130k
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
1.31M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
1.31M
  (!ctxt->disableSAX)) {
8739
1.20M
  if (nbatts > 0)
8740
416k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
792k
  else
8742
792k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
1.20M
    }
8744
8745
1.31M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
1.75M
        for (i = 1;i < nbatts;i+=2)
8748
546k
      if (atts[i] != NULL)
8749
546k
         xmlFree((xmlChar *) atts[i]);
8750
1.21M
    }
8751
1.31M
    return(name);
8752
1.31M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.10M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.10M
    const xmlChar *name;
8772
8773
1.10M
    GROW;
8774
1.10M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.10M
    SKIP(2);
8780
8781
1.10M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.10M
    GROW;
8787
1.10M
    SKIP_BLANKS;
8788
1.10M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
8.44k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
8.44k
    } else
8791
1.09M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.10M
    if (name != (xmlChar*)1) {
8800
26.5k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
26.5k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
26.5k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
26.5k
                    ctxt->name, line, name);
8804
26.5k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.10M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.10M
  (!ctxt->disableSAX))
8811
1.00M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.10M
    namePop(ctxt);
8814
1.10M
    spacePop(ctxt);
8815
1.10M
    return;
8816
1.10M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
2.64M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
2.64M
    int i;
8858
8859
2.64M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
2.49M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
375k
        if (ctxt->nsTab[i] == prefix) {
8862
333k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
297
          return(NULL);
8864
333k
      return(ctxt->nsTab[i + 1]);
8865
333k
  }
8866
2.12M
    return(NULL);
8867
2.45M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
3.71M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
3.71M
    const xmlChar *l, *p;
8886
8887
3.71M
    GROW;
8888
8889
3.71M
    l = xmlParseNCName(ctxt);
8890
3.71M
    if (l == NULL) {
8891
33.0k
        if (CUR == ':') {
8892
4.71k
      l = xmlParseName(ctxt);
8893
4.71k
      if (l != NULL) {
8894
4.71k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
4.71k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
4.71k
    *prefix = NULL;
8897
4.71k
    return(l);
8898
4.71k
      }
8899
4.71k
  }
8900
28.2k
        return(NULL);
8901
33.0k
    }
8902
3.68M
    if (CUR == ':') {
8903
390k
        NEXT;
8904
390k
  p = l;
8905
390k
  l = xmlParseNCName(ctxt);
8906
390k
  if (l == NULL) {
8907
2.92k
      xmlChar *tmp;
8908
8909
2.92k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
2.92k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
2.92k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
2.92k
      l = xmlParseNmtoken(ctxt);
8914
2.92k
      if (l == NULL) {
8915
1.89k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
1.89k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
1.89k
            } else {
8919
1.03k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
1.03k
    xmlFree((char *)l);
8921
1.03k
      }
8922
2.92k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
2.92k
      if (tmp != NULL) xmlFree(tmp);
8924
2.92k
      *prefix = NULL;
8925
2.92k
      return(p);
8926
2.92k
  }
8927
387k
  if (CUR == ':') {
8928
1.91k
      xmlChar *tmp;
8929
8930
1.91k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
1.91k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
1.91k
      NEXT;
8933
1.91k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
1.91k
      if (tmp != NULL) {
8935
1.34k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
1.34k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
1.34k
    if (tmp != NULL) xmlFree(tmp);
8938
1.34k
    *prefix = p;
8939
1.34k
    return(l);
8940
1.34k
      }
8941
569
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
569
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
569
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
569
      if (tmp != NULL) xmlFree(tmp);
8946
569
      *prefix = p;
8947
569
      return(l);
8948
569
  }
8949
385k
  *prefix = p;
8950
385k
    } else
8951
3.28M
        *prefix = NULL;
8952
3.67M
    return(l);
8953
3.68M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
121k
                        xmlChar const *prefix) {
8971
121k
    const xmlChar *cmp;
8972
121k
    const xmlChar *in;
8973
121k
    const xmlChar *ret;
8974
121k
    const xmlChar *prefix2;
8975
8976
121k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
121k
    GROW;
8979
121k
    in = ctxt->input->cur;
8980
8981
121k
    cmp = prefix;
8982
457k
    while (*in != 0 && *in == *cmp) {
8983
336k
  ++in;
8984
336k
  ++cmp;
8985
336k
    }
8986
121k
    if ((*cmp == 0) && (*in == ':')) {
8987
117k
        in++;
8988
117k
  cmp = name;
8989
1.03M
  while (*in != 0 && *in == *cmp) {
8990
917k
      ++in;
8991
917k
      ++cmp;
8992
917k
  }
8993
117k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
108k
            ctxt->input->col += in - ctxt->input->cur;
8996
108k
      ctxt->input->cur = in;
8997
108k
      return((const xmlChar*) 1);
8998
108k
  }
8999
117k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
12.1k
    ret = xmlParseQName (ctxt, &prefix2);
9004
12.1k
    if ((ret == name) && (prefix == prefix2))
9005
256
  return((const xmlChar*) 1);
9006
11.8k
    return ret;
9007
12.1k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
5.68k
    const xmlChar *oldbase = ctxt->input->base;\
9045
5.68k
    GROW;\
9046
5.68k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
5.68k
        return(NULL);\
9048
5.68k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
5.68k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
2.12M
{
9059
2.12M
    xmlChar limit = 0;
9060
2.12M
    const xmlChar *in = NULL, *start, *end, *last;
9061
2.12M
    xmlChar *ret = NULL;
9062
2.12M
    int line, col;
9063
2.12M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
471k
                    XML_MAX_HUGE_LENGTH :
9065
2.12M
                    XML_MAX_TEXT_LENGTH;
9066
9067
2.12M
    GROW;
9068
2.12M
    in = (xmlChar *) CUR_PTR;
9069
2.12M
    line = ctxt->input->line;
9070
2.12M
    col = ctxt->input->col;
9071
2.12M
    if (*in != '"' && *in != '\'') {
9072
5.26k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
5.26k
        return (NULL);
9074
5.26k
    }
9075
2.12M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
2.12M
    limit = *in++;
9083
2.12M
    col++;
9084
2.12M
    end = ctxt->input->end;
9085
2.12M
    start = in;
9086
2.12M
    if (in >= end) {
9087
130
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
130
    }
9089
2.12M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
207k
  while ((in < end) && (*in != limit) &&
9094
207k
         ((*in == 0x20) || (*in == 0x9) ||
9095
206k
          (*in == 0xA) || (*in == 0xD))) {
9096
11.7k
      if (*in == 0xA) {
9097
4.35k
          line++; col = 1;
9098
7.38k
      } else {
9099
7.38k
          col++;
9100
7.38k
      }
9101
11.7k
      in++;
9102
11.7k
      start = in;
9103
11.7k
      if (in >= end) {
9104
4
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
4
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
4
      }
9111
11.7k
  }
9112
1.88M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
1.88M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
1.69M
      col++;
9115
1.69M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
1.69M
      if (in >= end) {
9117
51
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
51
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
51
      }
9124
1.69M
  }
9125
195k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
196k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
201k
  while ((in < end) && (*in != limit) &&
9131
201k
         ((*in == 0x20) || (*in == 0x9) ||
9132
12.7k
          (*in == 0xA) || (*in == 0xD))) {
9133
5.71k
      if (*in == 0xA) {
9134
3.10k
          line++, col = 1;
9135
3.10k
      } else {
9136
2.60k
          col++;
9137
2.60k
      }
9138
5.71k
      in++;
9139
5.71k
      if (in >= end) {
9140
39
    const xmlChar *oldbase = ctxt->input->base;
9141
39
    GROW;
9142
39
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
39
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
39
    end = ctxt->input->end;
9151
39
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
39
      }
9157
5.71k
  }
9158
195k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
195k
  if (*in != limit) goto need_complex;
9164
1.92M
    } else {
9165
34.9M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
34.9M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
32.9M
      in++;
9168
32.9M
      col++;
9169
32.9M
      if (in >= end) {
9170
5.49k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
5.49k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
5.49k
      }
9177
32.9M
  }
9178
1.92M
  last = in;
9179
1.92M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
1.92M
  if (*in != limit) goto need_complex;
9185
1.92M
    }
9186
2.02M
    in++;
9187
2.02M
    col++;
9188
2.02M
    if (len != NULL) {
9189
1.28M
        if (alloc) *alloc = 0;
9190
1.28M
        *len = last - start;
9191
1.28M
        ret = (xmlChar *) start;
9192
1.28M
    } else {
9193
735k
        if (alloc) *alloc = 1;
9194
735k
        ret = xmlStrndup(start, last - start);
9195
735k
    }
9196
2.02M
    CUR_PTR = in;
9197
2.02M
    ctxt->input->line = line;
9198
2.02M
    ctxt->input->col = col;
9199
2.02M
    return ret;
9200
96.5k
need_complex:
9201
96.5k
    if (alloc) *alloc = 1;
9202
96.5k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
2.12M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
1.35M
{
9226
1.35M
    const xmlChar *name;
9227
1.35M
    xmlChar *val, *internal_val = NULL;
9228
1.35M
    int normalize = 0;
9229
9230
1.35M
    *value = NULL;
9231
1.35M
    GROW;
9232
1.35M
    name = xmlParseQName(ctxt, prefix);
9233
1.35M
    if (name == NULL) {
9234
15.4k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
15.4k
                       "error parsing attribute name\n");
9236
15.4k
        return (NULL);
9237
15.4k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
1.34M
    if (ctxt->attsSpecial != NULL) {
9243
505k
        int type;
9244
9245
505k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
505k
                                                 pref, elem, *prefix, name);
9247
505k
        if (type != 0)
9248
195k
            normalize = 1;
9249
505k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
1.34M
    SKIP_BLANKS;
9255
1.34M
    if (RAW == '=') {
9256
1.33M
        NEXT;
9257
1.33M
        SKIP_BLANKS;
9258
1.33M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
1.33M
        if (val == NULL)
9260
2.24k
            return (NULL);
9261
1.32M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
195k
      if (*alloc) {
9269
7.14k
          const xmlChar *val2;
9270
9271
7.14k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
7.14k
    if ((val2 != NULL) && (val2 != val)) {
9273
1.58k
        xmlFree(val);
9274
1.58k
        val = (xmlChar *) val2;
9275
1.58k
    }
9276
7.14k
      }
9277
195k
  }
9278
1.32M
        ctxt->instate = XML_PARSER_CONTENT;
9279
1.32M
    } else {
9280
9.70k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
9.70k
                          "Specification mandates value for attribute %s\n",
9282
9.70k
                          name);
9283
9.70k
        return (name);
9284
9.70k
    }
9285
9286
1.32M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
6.62k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
1.66k
            internal_val = xmlStrndup(val, *len);
9294
1.66k
            if (!xmlCheckLanguageID(internal_val)) {
9295
697
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
697
                              "Malformed value for xml:lang : %s\n",
9297
697
                              internal_val, NULL);
9298
697
            }
9299
1.66k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
6.62k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
296
            internal_val = xmlStrndup(val, *len);
9306
296
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
3
                *(ctxt->space) = 0;
9308
293
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
213
                *(ctxt->space) = 1;
9310
80
            else {
9311
80
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
80
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
80
                              internal_val, NULL);
9314
80
            }
9315
296
        }
9316
6.62k
        if (internal_val) {
9317
1.96k
            xmlFree(internal_val);
9318
1.96k
        }
9319
6.62k
    }
9320
9321
1.32M
    *value = val;
9322
1.32M
    return (name);
9323
1.34M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
2.34M
                  const xmlChar **URI, int *tlen) {
9356
2.34M
    const xmlChar *localname;
9357
2.34M
    const xmlChar *prefix;
9358
2.34M
    const xmlChar *attname;
9359
2.34M
    const xmlChar *aprefix;
9360
2.34M
    const xmlChar *nsname;
9361
2.34M
    xmlChar *attvalue;
9362
2.34M
    const xmlChar **atts = ctxt->atts;
9363
2.34M
    int maxatts = ctxt->maxatts;
9364
2.34M
    int nratts, nbatts, nbdef, inputid;
9365
2.34M
    int i, j, nbNs, attval;
9366
2.34M
    unsigned long cur;
9367
2.34M
    int nsNr = ctxt->nsNr;
9368
9369
2.34M
    if (RAW != '<') return(NULL);
9370
2.34M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
2.34M
    SHRINK;
9380
2.34M
    cur = ctxt->input->cur - ctxt->input->base;
9381
2.34M
    inputid = ctxt->input->id;
9382
2.34M
    nbatts = 0;
9383
2.34M
    nratts = 0;
9384
2.34M
    nbdef = 0;
9385
2.34M
    nbNs = 0;
9386
2.34M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
2.34M
    ctxt->nsNr = nsNr;
9389
9390
2.34M
    localname = xmlParseQName(ctxt, &prefix);
9391
2.34M
    if (localname == NULL) {
9392
12.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
12.4k
           "StartTag: invalid element name\n");
9394
12.4k
        return(NULL);
9395
12.4k
    }
9396
2.33M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
2.33M
    SKIP_BLANKS;
9404
2.33M
    GROW;
9405
9406
2.75M
    while (((RAW != '>') &&
9407
2.75M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
2.75M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
1.35M
  int len = -1, alloc = 0;
9410
9411
1.35M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
1.35M
                               &aprefix, &attvalue, &len, &alloc);
9413
1.35M
        if (attname == NULL) {
9414
17.7k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
17.7k
           "xmlParseStartTag: problem parsing attributes\n");
9416
17.7k
      break;
9417
17.7k
  }
9418
1.33M
        if (attvalue == NULL)
9419
9.70k
            goto next_attr;
9420
1.32M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
1.32M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
8.88k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
8.88k
            xmlURIPtr uri;
9425
9426
8.88k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
8.88k
            if (*URL != 0) {
9434
8.71k
                uri = xmlParseURI((const char *) URL);
9435
8.71k
                if (uri == NULL) {
9436
1.06k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
1.06k
                             "xmlns: '%s' is not a valid URI\n",
9438
1.06k
                                       URL, NULL, NULL);
9439
7.64k
                } else {
9440
7.64k
                    if (uri->scheme == NULL) {
9441
362
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
362
                                  "xmlns: URI %s is not absolute\n",
9443
362
                                  URL, NULL, NULL);
9444
362
                    }
9445
7.64k
                    xmlFreeURI(uri);
9446
7.64k
                }
9447
8.71k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
8.71k
                if ((len == 29) &&
9456
8.71k
                    (xmlStrEqual(URL,
9457
70
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
8.71k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
19.1k
            for (j = 1;j <= nbNs;j++)
9468
10.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
203
                    break;
9470
8.88k
            if (j <= nbNs)
9471
203
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
8.68k
            else
9473
8.68k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
1.32M
        } else if (aprefix == ctxt->str_xmlns) {
9476
27.9k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
27.9k
            xmlURIPtr uri;
9478
9479
27.9k
            if (attname == ctxt->str_xml) {
9480
101
                if (URL != ctxt->str_xml_ns) {
9481
101
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
101
                             "xml namespace prefix mapped to wrong URI\n",
9483
101
                             NULL, NULL, NULL);
9484
101
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
101
                goto next_attr;
9489
101
            }
9490
27.8k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
27.8k
            if (attname == ctxt->str_xmlns) {
9499
6
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
6
                         "redefinition of the xmlns prefix is forbidden\n",
9501
6
                         NULL, NULL, NULL);
9502
6
                goto next_attr;
9503
6
            }
9504
27.8k
            if ((len == 29) &&
9505
27.8k
                (xmlStrEqual(URL,
9506
452
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
27.8k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
97
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
97
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
97
                              attname, NULL, NULL);
9516
97
                goto next_attr;
9517
27.7k
            } else {
9518
27.7k
                uri = xmlParseURI((const char *) URL);
9519
27.7k
                if (uri == NULL) {
9520
2.78k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
2.78k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
2.78k
                                       attname, URL, NULL);
9523
24.9k
                } else {
9524
24.9k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
244
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
244
                                  "xmlns:%s: URI %s is not absolute\n",
9527
244
                                  attname, URL, NULL);
9528
244
                    }
9529
24.9k
                    xmlFreeURI(uri);
9530
24.9k
                }
9531
27.7k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
44.4k
            for (j = 1;j <= nbNs;j++)
9537
16.8k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
246
                    break;
9539
27.7k
            if (j <= nbNs)
9540
246
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
27.5k
            else
9542
27.5k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
1.29M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
1.29M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
34.0k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
34.0k
                maxatts = ctxt->maxatts;
9553
34.0k
                atts = ctxt->atts;
9554
34.0k
            }
9555
1.29M
            ctxt->attallocs[nratts++] = alloc;
9556
1.29M
            atts[nbatts++] = attname;
9557
1.29M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
1.29M
            if (alloc)
9565
35.3k
                atts[nbatts++] = NULL;
9566
1.25M
            else
9567
1.25M
                atts[nbatts++] = ctxt->input->base;
9568
1.29M
            atts[nbatts++] = attvalue;
9569
1.29M
            attvalue += len;
9570
1.29M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
1.29M
            if (alloc != 0) attval = 1;
9575
1.29M
            attvalue = NULL; /* moved into atts */
9576
1.29M
        }
9577
9578
1.33M
next_attr:
9579
1.33M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
4.51k
            xmlFree(attvalue);
9581
4.51k
            attvalue = NULL;
9582
4.51k
        }
9583
9584
1.33M
  GROW
9585
1.33M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
1.33M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
892k
      break;
9589
447k
  if (SKIP_BLANKS == 0) {
9590
24.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
24.9k
         "attributes construct error\n");
9592
24.9k
      break;
9593
24.9k
  }
9594
422k
        GROW;
9595
422k
    }
9596
9597
2.33M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
3.62M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
1.29M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
1.25M
            const xmlChar *old = atts[i+2];
9612
1.25M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
1.25M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
1.25M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
1.25M
        }
9616
1.29M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
2.33M
    if (ctxt->attsDefault != NULL) {
9622
1.19M
        xmlDefAttrsPtr defaults;
9623
9624
1.19M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
1.19M
  if (defaults != NULL) {
9626
383k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
275k
          attname = defaults->values[5 * i];
9628
275k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
275k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
232
        for (j = 1;j <= nbNs;j++)
9638
165
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
82
          break;
9640
149
              if (j <= nbNs) continue;
9641
9642
67
        nsname = xmlGetNamespace(ctxt, NULL);
9643
67
        if (nsname != defaults->values[5 * i + 2]) {
9644
67
      if (nsPush(ctxt, NULL,
9645
67
                 defaults->values[5 * i + 2]) > 0)
9646
67
          nbNs++;
9647
67
        }
9648
275k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
3.29k
        for (j = 1;j <= nbNs;j++)
9653
2.77k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
2.60k
          break;
9655
3.11k
              if (j <= nbNs) continue;
9656
9657
513
        nsname = xmlGetNamespace(ctxt, attname);
9658
513
        if (nsname != defaults->values[5 * i + 2]) {
9659
461
      if (nsPush(ctxt, attname,
9660
461
                 defaults->values[5 * i + 2]) > 0)
9661
461
          nbNs++;
9662
461
        }
9663
272k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
763k
        for (j = 0;j < nbatts;j+=5) {
9668
491k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
421
          break;
9670
491k
        }
9671
272k
        if (j < nbatts) continue;
9672
9673
271k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
723
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
723
      maxatts = ctxt->maxatts;
9679
723
      atts = ctxt->atts;
9680
723
        }
9681
271k
        atts[nbatts++] = attname;
9682
271k
        atts[nbatts++] = aprefix;
9683
271k
        if (aprefix == NULL)
9684
180k
      atts[nbatts++] = NULL;
9685
91.6k
        else
9686
91.6k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
271k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
271k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
271k
        if ((ctxt->standalone == 1) &&
9690
271k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
271k
        nbdef++;
9696
271k
    }
9697
275k
      }
9698
108k
  }
9699
1.19M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
3.89M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
1.56M
  if (atts[i + 1] != NULL) {
9709
222k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
222k
      if (nsname == NULL) {
9711
7.63k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
7.63k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
7.63k
        atts[i + 1], atts[i], localname);
9714
7.63k
      }
9715
222k
      atts[i + 2] = nsname;
9716
222k
  } else
9717
1.34M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
2.53M
        for (j = 0; j < i;j += 5) {
9725
968k
      if (atts[i] == atts[j]) {
9726
2.00k
          if (atts[i+1] == atts[j+1]) {
9727
960
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
960
        break;
9729
960
    }
9730
1.04k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
39
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
39
           "Namespaced Attribute %s in '%s' redefined\n",
9733
39
           atts[i], nsname, NULL);
9734
39
        break;
9735
39
    }
9736
1.04k
      }
9737
968k
  }
9738
1.56M
    }
9739
9740
2.33M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
2.33M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
22.8k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
22.8k
           "Namespace prefix %s on %s is not defined\n",
9744
22.8k
     prefix, localname, NULL);
9745
22.8k
    }
9746
2.33M
    *pref = prefix;
9747
2.33M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
2.33M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
2.33M
  (!ctxt->disableSAX)) {
9754
1.88M
  if (nbNs > 0)
9755
16.0k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
16.0k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
16.0k
        nbatts / 5, nbdef, atts);
9758
1.86M
  else
9759
1.86M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
1.86M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
1.88M
    }
9762
9763
2.33M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
2.33M
    if (attval != 0) {
9768
86.4k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
53.6k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
35.3k
          xmlFree((xmlChar *) atts[i]);
9771
32.8k
    }
9772
9773
2.33M
    return(localname);
9774
2.33M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
1.82M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
1.82M
    const xmlChar *name;
9794
9795
1.82M
    GROW;
9796
1.82M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
1.82M
    SKIP(2);
9801
9802
1.82M
    if (tag->prefix == NULL)
9803
1.70M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
121k
    else
9805
121k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
1.82M
    GROW;
9811
1.82M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
1.82M
    SKIP_BLANKS;
9814
1.82M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
9.95k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
9.95k
    } else
9817
1.81M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
1.82M
    if (name != (xmlChar*)1) {
9826
28.0k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
28.0k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
28.0k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
28.0k
                    ctxt->name, tag->line, name);
9830
28.0k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
1.82M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
1.82M
  (!ctxt->disableSAX))
9837
1.41M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
1.41M
                                tag->URI);
9839
9840
1.82M
    spacePop(ctxt);
9841
1.82M
    if (tag->nsNr != 0)
9842
6.18k
  nsPop(ctxt, tag->nsNr);
9843
1.82M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
5.63k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
5.63k
    xmlChar *buf = NULL;
9864
5.63k
    int len = 0;
9865
5.63k
    int size = XML_PARSER_BUFFER_SIZE;
9866
5.63k
    int r, rl;
9867
5.63k
    int s, sl;
9868
5.63k
    int cur, l;
9869
5.63k
    int count = 0;
9870
5.63k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
881
                    XML_MAX_HUGE_LENGTH :
9872
5.63k
                    XML_MAX_TEXT_LENGTH;
9873
9874
5.63k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
5.63k
    SKIP(3);
9877
9878
5.63k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
5.63k
    SKIP(6);
9881
9882
5.63k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
5.63k
    r = CUR_CHAR(rl);
9884
5.63k
    if (!IS_CHAR(r)) {
9885
44
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
44
        goto out;
9887
44
    }
9888
5.59k
    NEXTL(rl);
9889
5.59k
    s = CUR_CHAR(sl);
9890
5.59k
    if (!IS_CHAR(s)) {
9891
39
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
39
        goto out;
9893
39
    }
9894
5.55k
    NEXTL(sl);
9895
5.55k
    cur = CUR_CHAR(l);
9896
5.55k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
5.55k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
4.12M
    while (IS_CHAR(cur) &&
9902
4.12M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
4.11M
  if (len + 5 >= size) {
9904
15.9k
      xmlChar *tmp;
9905
9906
15.9k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
15.9k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
15.9k
      buf = tmp;
9912
15.9k
      size *= 2;
9913
15.9k
  }
9914
4.11M
  COPY_BUF(rl,buf,len,r);
9915
4.11M
  r = s;
9916
4.11M
  rl = sl;
9917
4.11M
  s = cur;
9918
4.11M
  sl = l;
9919
4.11M
  count++;
9920
4.11M
  if (count > 50) {
9921
77.7k
      SHRINK;
9922
77.7k
      GROW;
9923
77.7k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
77.7k
      count = 0;
9927
77.7k
  }
9928
4.11M
  NEXTL(l);
9929
4.11M
  cur = CUR_CHAR(l);
9930
4.11M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
4.11M
    }
9936
5.55k
    buf[len] = 0;
9937
5.55k
    if (cur != '>') {
9938
1.48k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
1.48k
                       "CData section not finished\n%.50s\n", buf);
9940
1.48k
        goto out;
9941
1.48k
    }
9942
4.07k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
4.07k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
2.44k
  if (ctxt->sax->cdataBlock != NULL)
9949
1.83k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
612
  else if (ctxt->sax->characters != NULL)
9951
612
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
2.44k
    }
9953
9954
5.63k
out:
9955
5.63k
    if (ctxt->instate != XML_PARSER_EOF)
9956
5.63k
        ctxt->instate = XML_PARSER_CONTENT;
9957
5.63k
    xmlFree(buf);
9958
5.63k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
56.4k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
56.4k
    int nameNr = ctxt->nameNr;
9971
9972
56.4k
    GROW;
9973
6.44M
    while ((RAW != 0) &&
9974
6.44M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
6.40M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
6.40M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
12.4k
      xmlParsePI(ctxt);
9982
12.4k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
6.39M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
5.63k
      xmlParseCDSect(ctxt);
9990
5.63k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
6.38M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
6.38M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
69.0k
      xmlParseComment(ctxt);
9998
69.0k
      ctxt->instate = XML_PARSER_CONTENT;
9999
69.0k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
6.31M
  else if (*cur == '<') {
10005
2.84M
            if (NXT(1) == '/') {
10006
1.31M
                if (ctxt->nameNr <= nameNr)
10007
14.1k
                    break;
10008
1.30M
          xmlParseElementEnd(ctxt);
10009
1.53M
            } else {
10010
1.53M
          xmlParseElementStart(ctxt);
10011
1.53M
            }
10012
2.84M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
3.47M
  else if (*cur == '&') {
10020
796k
      xmlParseReference(ctxt);
10021
796k
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
2.67M
  else {
10027
2.67M
      xmlParseCharData(ctxt, 0);
10028
2.67M
  }
10029
10030
6.39M
  GROW;
10031
6.39M
  SHRINK;
10032
6.39M
    }
10033
56.4k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
27.4k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
27.4k
    int nameNr = ctxt->nameNr;
10047
10048
27.4k
    xmlParseContentInternal(ctxt);
10049
10050
27.4k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
183
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
183
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
183
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
183
                "Premature end of data in tag %s line %d\n",
10055
183
    name, line, NULL);
10056
183
    }
10057
27.4k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
48.4k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
48.4k
    if (xmlParseElementStart(ctxt) != 0)
10078
19.4k
        return;
10079
10080
28.9k
    xmlParseContentInternal(ctxt);
10081
28.9k
    if (ctxt->instate == XML_PARSER_EOF)
10082
207
  return;
10083
10084
28.7k
    if (CUR == 0) {
10085
14.7k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
14.7k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
14.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
14.7k
                "Premature end of data in tag %s line %d\n",
10089
14.7k
    name, line, NULL);
10090
14.7k
        return;
10091
14.7k
    }
10092
10093
13.9k
    xmlParseElementEnd(ctxt);
10094
13.9k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
1.58M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
1.58M
    const xmlChar *name;
10108
1.58M
    const xmlChar *prefix = NULL;
10109
1.58M
    const xmlChar *URI = NULL;
10110
1.58M
    xmlParserNodeInfo node_info;
10111
1.58M
    int line, tlen = 0;
10112
1.58M
    xmlNodePtr ret;
10113
1.58M
    int nsNr = ctxt->nsNr;
10114
10115
1.58M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
1.58M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
1.58M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
1.58M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
1.58M
    else if (*ctxt->space == -2)
10134
80.1k
  spacePush(ctxt, -1);
10135
1.49M
    else
10136
1.49M
  spacePush(ctxt, *ctxt->space);
10137
10138
1.58M
    line = ctxt->input->line;
10139
1.58M
#ifdef LIBXML_SAX1_ENABLED
10140
1.58M
    if (ctxt->sax2)
10141
1.05M
#endif /* LIBXML_SAX1_ENABLED */
10142
1.05M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
521k
#ifdef LIBXML_SAX1_ENABLED
10144
521k
    else
10145
521k
  name = xmlParseStartTag(ctxt);
10146
1.58M
#endif /* LIBXML_SAX1_ENABLED */
10147
1.58M
    if (ctxt->instate == XML_PARSER_EOF)
10148
132
  return(-1);
10149
1.57M
    if (name == NULL) {
10150
14.9k
  spacePop(ctxt);
10151
14.9k
        return(-1);
10152
14.9k
    }
10153
1.56M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
1.56M
    ret = ctxt->node;
10155
10156
1.56M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
1.56M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
1.56M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
1.56M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
1.56M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
187k
        SKIP(2);
10172
187k
  if (ctxt->sax2) {
10173
141k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
141k
    (!ctxt->disableSAX))
10175
123k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
141k
#ifdef LIBXML_SAX1_ENABLED
10177
141k
  } else {
10178
46.5k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
46.5k
    (!ctxt->disableSAX))
10180
37.9k
    ctxt->sax->endElement(ctxt->userData, name);
10181
46.5k
#endif /* LIBXML_SAX1_ENABLED */
10182
46.5k
  }
10183
187k
  namePop(ctxt);
10184
187k
  spacePop(ctxt);
10185
187k
  if (nsNr != ctxt->nsNr)
10186
345
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
187k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
187k
  return(1);
10195
187k
    }
10196
1.37M
    if (RAW == '>') {
10197
1.34M
        NEXT1;
10198
1.34M
    } else {
10199
30.3k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
30.3k
         "Couldn't find end of Start Tag %s line %d\n",
10201
30.3k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
30.3k
  nodePop(ctxt);
10207
30.3k
  namePop(ctxt);
10208
30.3k
  spacePop(ctxt);
10209
30.3k
  if (nsNr != ctxt->nsNr)
10210
1.76k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
30.3k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
30.3k
  return(-1);
10223
30.3k
    }
10224
10225
1.34M
    return(0);
10226
1.37M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
1.31M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
1.31M
    xmlParserNodeInfo node_info;
10237
1.31M
    xmlNodePtr ret = ctxt->node;
10238
10239
1.31M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
1.31M
    if (ctxt->sax2) {
10249
872k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
872k
  namePop(ctxt);
10251
872k
    }
10252
443k
#ifdef LIBXML_SAX1_ENABLED
10253
443k
    else
10254
443k
  xmlParseEndTag1(ctxt, 0);
10255
1.31M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
1.31M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
1.31M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
101k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
101k
    xmlChar *buf = NULL;
10286
101k
    int len = 0;
10287
101k
    int size = 10;
10288
101k
    xmlChar cur;
10289
10290
101k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
101k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
101k
    cur = CUR;
10296
101k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
1.42k
  xmlFree(buf);
10298
1.42k
  return(NULL);
10299
1.42k
    }
10300
99.9k
    buf[len++] = cur;
10301
99.9k
    NEXT;
10302
99.9k
    cur=CUR;
10303
99.9k
    if (cur != '.') {
10304
953
  xmlFree(buf);
10305
953
  return(NULL);
10306
953
    }
10307
99.0k
    buf[len++] = cur;
10308
99.0k
    NEXT;
10309
99.0k
    cur=CUR;
10310
199k
    while ((cur >= '0') && (cur <= '9')) {
10311
100k
  if (len + 1 >= size) {
10312
168
      xmlChar *tmp;
10313
10314
168
      size *= 2;
10315
168
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
168
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
168
      buf = tmp;
10322
168
  }
10323
100k
  buf[len++] = cur;
10324
100k
  NEXT;
10325
100k
  cur=CUR;
10326
100k
    }
10327
99.0k
    buf[len] = 0;
10328
99.0k
    return(buf);
10329
99.0k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
116k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
116k
    xmlChar *version = NULL;
10349
10350
116k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
103k
  SKIP(7);
10352
103k
  SKIP_BLANKS;
10353
103k
  if (RAW != '=') {
10354
1.14k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
1.14k
      return(NULL);
10356
1.14k
        }
10357
102k
  NEXT;
10358
102k
  SKIP_BLANKS;
10359
102k
  if (RAW == '"') {
10360
94.5k
      NEXT;
10361
94.5k
      version = xmlParseVersionNum(ctxt);
10362
94.5k
      if (RAW != '"') {
10363
3.51k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
3.51k
      } else
10365
91.0k
          NEXT;
10366
94.5k
  } else if (RAW == '\''){
10367
6.85k
      NEXT;
10368
6.85k
      version = xmlParseVersionNum(ctxt);
10369
6.85k
      if (RAW != '\'') {
10370
1.00k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
1.00k
      } else
10372
5.84k
          NEXT;
10373
6.85k
  } else {
10374
1.01k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
1.01k
  }
10376
102k
    }
10377
115k
    return(version);
10378
116k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
47.3k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
47.3k
    xmlChar *buf = NULL;
10395
47.3k
    int len = 0;
10396
47.3k
    int size = 10;
10397
47.3k
    xmlChar cur;
10398
10399
47.3k
    cur = CUR;
10400
47.3k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
47.3k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
47.0k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
47.0k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
47.0k
  buf[len++] = cur;
10409
47.0k
  NEXT;
10410
47.0k
  cur = CUR;
10411
277k
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
277k
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
277k
         ((cur >= '0') && (cur <= '9')) ||
10414
277k
         (cur == '.') || (cur == '_') ||
10415
277k
         (cur == '-')) {
10416
230k
      if (len + 1 >= size) {
10417
6.72k
          xmlChar *tmp;
10418
10419
6.72k
    size *= 2;
10420
6.72k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
6.72k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
6.72k
    buf = tmp;
10427
6.72k
      }
10428
230k
      buf[len++] = cur;
10429
230k
      NEXT;
10430
230k
      cur = CUR;
10431
230k
      if (cur == 0) {
10432
164
          SHRINK;
10433
164
    GROW;
10434
164
    cur = CUR;
10435
164
      }
10436
230k
        }
10437
47.0k
  buf[len] = 0;
10438
47.0k
    } else {
10439
252
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
252
    }
10441
47.3k
    return(buf);
10442
47.3k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
80.9k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
80.9k
    xmlChar *encoding = NULL;
10462
10463
80.9k
    SKIP_BLANKS;
10464
80.9k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
47.9k
  SKIP(8);
10466
47.9k
  SKIP_BLANKS;
10467
47.9k
  if (RAW != '=') {
10468
242
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
242
      return(NULL);
10470
242
        }
10471
47.6k
  NEXT;
10472
47.6k
  SKIP_BLANKS;
10473
47.6k
  if (RAW == '"') {
10474
46.6k
      NEXT;
10475
46.6k
      encoding = xmlParseEncName(ctxt);
10476
46.6k
      if (RAW != '"') {
10477
1.40k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
1.40k
    xmlFree((xmlChar *) encoding);
10479
1.40k
    return(NULL);
10480
1.40k
      } else
10481
45.2k
          NEXT;
10482
46.6k
  } else if (RAW == '\''){
10483
651
      NEXT;
10484
651
      encoding = xmlParseEncName(ctxt);
10485
651
      if (RAW != '\'') {
10486
54
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
54
    xmlFree((xmlChar *) encoding);
10488
54
    return(NULL);
10489
54
      } else
10490
597
          NEXT;
10491
651
  } else {
10492
367
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
367
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
46.2k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
13.0k
      xmlFree((xmlChar *) encoding);
10500
13.0k
            return(NULL);
10501
13.0k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
33.2k
        if ((encoding != NULL) &&
10508
33.2k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
32.9k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
0
      if ((ctxt->encoding == NULL) &&
10517
0
          (ctxt->input->buf != NULL) &&
10518
0
          (ctxt->input->buf->encoder == NULL)) {
10519
0
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
0
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
0
      }
10522
0
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
0
      ctxt->encoding = encoding;
10525
0
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
33.2k
        else if ((encoding != NULL) &&
10530
33.2k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
32.9k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
25.8k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
25.8k
      ctxt->encoding = encoding;
10535
25.8k
  }
10536
7.36k
  else if (encoding != NULL) {
10537
7.06k
      xmlCharEncodingHandlerPtr handler;
10538
10539
7.06k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
7.06k
      ctxt->input->encoding = encoding;
10542
10543
7.06k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
7.06k
      if (handler != NULL) {
10545
6.77k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
3
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
3
        return(NULL);
10549
3
    }
10550
6.77k
      } else {
10551
287
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
287
      "Unsupported encoding %s\n", encoding);
10553
287
    return(NULL);
10554
287
      }
10555
7.06k
  }
10556
33.2k
    }
10557
65.9k
    return(encoding);
10558
80.9k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
70.7k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
70.7k
    int standalone = -2;
10596
10597
70.7k
    SKIP_BLANKS;
10598
70.7k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
8.19k
  SKIP(10);
10600
8.19k
        SKIP_BLANKS;
10601
8.19k
  if (RAW != '=') {
10602
147
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
147
      return(standalone);
10604
147
        }
10605
8.05k
  NEXT;
10606
8.05k
  SKIP_BLANKS;
10607
8.05k
        if (RAW == '\''){
10608
1.08k
      NEXT;
10609
1.08k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
105
          standalone = 0;
10611
105
                SKIP(2);
10612
975
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
975
                 (NXT(2) == 's')) {
10614
855
          standalone = 1;
10615
855
    SKIP(3);
10616
855
            } else {
10617
120
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
120
      }
10619
1.08k
      if (RAW != '\'') {
10620
144
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
144
      } else
10622
936
          NEXT;
10623
6.97k
  } else if (RAW == '"'){
10624
6.84k
      NEXT;
10625
6.84k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
5.16k
          standalone = 0;
10627
5.16k
    SKIP(2);
10628
5.16k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
1.68k
                 (NXT(2) == 's')) {
10630
1.49k
          standalone = 1;
10631
1.49k
                SKIP(3);
10632
1.49k
            } else {
10633
198
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
198
      }
10635
6.84k
      if (RAW != '"') {
10636
345
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
345
      } else
10638
6.50k
          NEXT;
10639
6.84k
  } else {
10640
123
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
123
        }
10642
8.05k
    }
10643
70.6k
    return(standalone);
10644
70.7k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
112k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
112k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
112k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
112k
    SKIP(5);
10672
10673
112k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
112k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
112k
    version = xmlParseVersionInfo(ctxt);
10683
112k
    if (version == NULL) {
10684
16.9k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
95.5k
    } else {
10686
95.5k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
1.55k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
280
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
280
                "Unsupported version '%s'\n",
10693
280
                version);
10694
1.27k
      } else {
10695
1.27k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
1.03k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
1.03k
                      "Unsupported version '%s'\n",
10698
1.03k
          version, NULL);
10699
1.03k
    } else {
10700
236
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
236
              "Unsupported version '%s'\n",
10702
236
              version);
10703
236
    }
10704
1.27k
      }
10705
1.55k
  }
10706
95.5k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
95.5k
  ctxt->version = version;
10709
95.5k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
112k
    if (!IS_BLANK_CH(RAW)) {
10715
55.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
35.3k
      SKIP(2);
10717
35.3k
      return;
10718
35.3k
  }
10719
20.2k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
20.2k
    }
10721
77.0k
    xmlParseEncodingDecl(ctxt);
10722
77.0k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
77.0k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
240
        return;
10728
240
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
76.8k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
6.32k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
6.07k
      SKIP(2);
10736
6.07k
      return;
10737
6.07k
  }
10738
250
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
250
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
70.7k
    GROW;
10745
10746
70.7k
    SKIP_BLANKS;
10747
70.7k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
70.7k
    SKIP_BLANKS;
10750
70.7k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
43.5k
        SKIP(2);
10752
43.5k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
322
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
322
  NEXT;
10756
26.8k
    } else {
10757
26.8k
        int c;
10758
10759
26.8k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
909k
        while ((c = CUR) != 0) {
10761
906k
            NEXT;
10762
906k
            if (c == '>')
10763
24.2k
                break;
10764
906k
        }
10765
26.8k
    }
10766
70.7k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
141k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
164k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
164k
        SKIP_BLANKS;
10783
164k
        GROW;
10784
164k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
15.5k
      xmlParsePI(ctxt);
10786
148k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
7.62k
      xmlParseComment(ctxt);
10788
141k
        } else {
10789
141k
            break;
10790
141k
        }
10791
164k
    }
10792
141k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
70.4k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
70.4k
    xmlChar start[4];
10812
70.4k
    xmlCharEncoding enc;
10813
10814
70.4k
    xmlInitParser();
10815
10816
70.4k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
70.4k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
70.4k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
70.4k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
70.4k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
70.4k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
70.4k
    if ((ctxt->encoding == NULL) &&
10835
70.4k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
68.5k
  start[0] = RAW;
10842
68.5k
  start[1] = NXT(1);
10843
68.5k
  start[2] = NXT(2);
10844
68.5k
  start[3] = NXT(3);
10845
68.5k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
68.5k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
40.6k
      xmlSwitchEncoding(ctxt, enc);
10848
40.6k
  }
10849
68.5k
    }
10850
10851
10852
70.4k
    if (CUR == 0) {
10853
824
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
824
  return(-1);
10855
824
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
69.5k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
9.00k
       GROW;
10865
9.00k
    }
10866
69.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
37.6k
  xmlParseXMLDecl(ctxt);
10872
37.6k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
37.6k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
83
      return(-1);
10878
83
  }
10879
37.5k
  ctxt->standalone = ctxt->input->standalone;
10880
37.5k
  SKIP_BLANKS;
10881
37.5k
    } else {
10882
31.9k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
31.9k
    }
10884
69.5k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
64.1k
        ctxt->sax->startDocument(ctxt->userData);
10886
69.5k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
69.5k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
69.5k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
69.5k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
69.5k
    GROW;
10903
69.5k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
35.6k
  ctxt->inSubset = 1;
10906
35.6k
  xmlParseDocTypeDecl(ctxt);
10907
35.6k
  if (RAW == '[') {
10908
27.1k
      ctxt->instate = XML_PARSER_DTD;
10909
27.1k
      xmlParseInternalSubset(ctxt);
10910
27.1k
      if (ctxt->instate == XML_PARSER_EOF)
10911
8.74k
    return(-1);
10912
27.1k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
26.9k
  ctxt->inSubset = 2;
10918
26.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
26.9k
      (!ctxt->disableSAX))
10920
24.0k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
24.0k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
26.9k
  if (ctxt->instate == XML_PARSER_EOF)
10923
3.77k
      return(-1);
10924
23.1k
  ctxt->inSubset = 0;
10925
10926
23.1k
        xmlCleanSpecialAttr(ctxt);
10927
10928
23.1k
  ctxt->instate = XML_PARSER_PROLOG;
10929
23.1k
  xmlParseMisc(ctxt);
10930
23.1k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
56.9k
    GROW;
10936
56.9k
    if (RAW != '<') {
10937
8.56k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
8.56k
           "Start tag expected, '<' not found\n");
10939
48.4k
    } else {
10940
48.4k
  ctxt->instate = XML_PARSER_CONTENT;
10941
48.4k
  xmlParseElement(ctxt);
10942
48.4k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
48.4k
  xmlParseMisc(ctxt);
10949
10950
48.4k
  if (RAW != 0) {
10951
15.9k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
15.9k
  }
10953
48.4k
  ctxt->instate = XML_PARSER_EOF;
10954
48.4k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
56.9k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
56.9k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
56.9k
    if ((ctxt->myDoc != NULL) &&
10966
56.9k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
427
  xmlFreeDoc(ctxt->myDoc);
10968
427
  ctxt->myDoc = NULL;
10969
427
    }
10970
10971
56.9k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
6.13k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
6.13k
  if (ctxt->valid)
10974
4.82k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
6.13k
  if (ctxt->nsWellFormed)
10976
5.58k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
6.13k
  if (ctxt->options & XML_PARSE_OLD10)
10978
905
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
6.13k
    }
10980
56.9k
    if (! ctxt->wellFormed) {
10981
50.8k
  ctxt->valid = 0;
10982
50.8k
  return(-1);
10983
50.8k
    }
10984
6.13k
    return(0);
10985
56.9k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
2.10M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
2.10M
    const xmlChar *cur;
11110
11111
2.10M
    if (ctxt->checkIndex == 0) {
11112
2.03M
        cur = ctxt->input->cur + 1;
11113
2.03M
    } else {
11114
66.8k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
66.8k
    }
11116
11117
2.10M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
71.8k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
71.8k
        return(0);
11120
2.02M
    } else {
11121
2.02M
        ctxt->checkIndex = 0;
11122
2.02M
        return(1);
11123
2.02M
    }
11124
2.10M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
397k
                     const char *str, size_t strLen) {
11138
397k
    const xmlChar *cur, *term;
11139
11140
397k
    if (ctxt->checkIndex == 0) {
11141
265k
        cur = ctxt->input->cur + startDelta;
11142
265k
    } else {
11143
132k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
132k
    }
11145
11146
397k
    term = BAD_CAST strstr((const char *) cur, str);
11147
397k
    if (term == NULL) {
11148
173k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
173k
        if ((size_t) (end - cur) < strLen)
11152
4.49k
            end = cur;
11153
168k
        else
11154
168k
            end -= strLen - 1;
11155
173k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
224k
    } else {
11157
224k
        ctxt->checkIndex = 0;
11158
224k
    }
11159
11160
397k
    return(term);
11161
397k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
3.38M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
3.38M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
3.38M
    const xmlChar *end = ctxt->input->end;
11173
11174
81.6M
    while (cur < end) {
11175
81.1M
        if ((*cur == '<') || (*cur == '&')) {
11176
2.85M
            ctxt->checkIndex = 0;
11177
2.85M
            return(1);
11178
2.85M
        }
11179
78.2M
        cur++;
11180
78.2M
    }
11181
11182
534k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
534k
    return(0);
11184
3.38M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
2.51M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
2.51M
    const xmlChar *cur;
11196
2.51M
    const xmlChar *end = ctxt->input->end;
11197
2.51M
    int state = ctxt->endCheckState;
11198
11199
2.51M
    if (ctxt->checkIndex == 0)
11200
2.08M
        cur = ctxt->input->cur + 1;
11201
428k
    else
11202
428k
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
83.8M
    while (cur < end) {
11205
83.3M
        if (state) {
11206
40.9M
            if (*cur == state)
11207
1.31M
                state = 0;
11208
42.3M
        } else if (*cur == '\'' || *cur == '"') {
11209
1.33M
            state = *cur;
11210
41.0M
        } else if (*cur == '>') {
11211
2.06M
            ctxt->checkIndex = 0;
11212
2.06M
            ctxt->endCheckState = 0;
11213
2.06M
            return(1);
11214
2.06M
        }
11215
81.3M
        cur++;
11216
81.3M
    }
11217
11218
457k
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
457k
    ctxt->endCheckState = state;
11220
457k
    return(0);
11221
2.51M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
268k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
268k
    const xmlChar *cur, *start;
11240
268k
    const xmlChar *end = ctxt->input->end;
11241
268k
    int state = ctxt->endCheckState;
11242
11243
268k
    if (ctxt->checkIndex == 0) {
11244
47.2k
        cur = ctxt->input->cur + 1;
11245
220k
    } else {
11246
220k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
220k
    }
11248
268k
    start = cur;
11249
11250
48.8M
    while (cur < end) {
11251
48.6M
        if (state == '-') {
11252
1.19M
            if ((*cur == '-') &&
11253
1.19M
                (cur[1] == '-') &&
11254
1.19M
                (cur[2] == '>')) {
11255
14.0k
                state = 0;
11256
14.0k
                cur += 3;
11257
14.0k
                start = cur;
11258
14.0k
                continue;
11259
14.0k
            }
11260
1.19M
        }
11261
47.4M
        else if (state == ']') {
11262
42.5k
            if (*cur == '>') {
11263
39.6k
                ctxt->checkIndex = 0;
11264
39.6k
                ctxt->endCheckState = 0;
11265
39.6k
                return(1);
11266
39.6k
            }
11267
2.87k
            if (IS_BLANK_CH(*cur)) {
11268
1.07k
                state = ' ';
11269
1.79k
            } else if (*cur != ']') {
11270
642
                state = 0;
11271
642
                start = cur;
11272
642
                continue;
11273
642
            }
11274
2.87k
        }
11275
47.4M
        else if (state == ' ') {
11276
3.62k
            if (*cur == '>') {
11277
479
                ctxt->checkIndex = 0;
11278
479
                ctxt->endCheckState = 0;
11279
479
                return(1);
11280
479
            }
11281
3.14k
            if (!IS_BLANK_CH(*cur)) {
11282
547
                state = 0;
11283
547
                start = cur;
11284
547
                continue;
11285
547
            }
11286
3.14k
        }
11287
47.4M
        else if (state != 0) {
11288
28.9M
            if (*cur == state) {
11289
166k
                state = 0;
11290
166k
                start = cur + 1;
11291
166k
            }
11292
28.9M
        }
11293
18.4M
        else if (*cur == '<') {
11294
390k
            if ((cur[1] == '!') &&
11295
390k
                (cur[2] == '-') &&
11296
390k
                (cur[3] == '-')) {
11297
14.3k
                state = '-';
11298
14.3k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
14.3k
                start = cur;
11301
14.3k
                continue;
11302
14.3k
            }
11303
390k
        }
11304
18.0M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
210k
            state = *cur;
11306
210k
        }
11307
11308
48.5M
        cur++;
11309
48.5M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
227k
    if ((state == 0) || (state == '-')) {
11316
86.3k
        if (cur - start < 3)
11317
3.02k
            cur = start;
11318
83.2k
        else
11319
83.2k
            cur -= 3;
11320
86.3k
    }
11321
227k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
227k
    ctxt->endCheckState = state;
11323
227k
    return(0);
11324
268k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
41.8k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
41.8k
    int ix;
11340
41.8k
    unsigned char c;
11341
41.8k
    int codepoint;
11342
11343
41.8k
    if ((utf == NULL) || (len <= 0))
11344
41
        return(0);
11345
11346
2.90M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
2.88M
        c = utf[ix];
11348
2.88M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
1.75M
      if (c >= 0x20)
11350
1.69M
    ix++;
11351
56.4k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
52.9k
          ix++;
11353
3.49k
      else
11354
3.49k
          return(-ix);
11355
1.75M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
51.2k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
51.1k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
1.40k
          return(-ix);
11359
49.7k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
49.7k
      codepoint |= utf[ix+1] & 0x3f;
11361
49.7k
      if (!xmlIsCharQ(codepoint))
11362
30
          return(-ix);
11363
49.7k
      ix += 2;
11364
1.08M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
174k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
173k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
173k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
1.96k
        return(-ix);
11369
171k
      codepoint = (utf[ix] & 0xf) << 12;
11370
171k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
171k
      codepoint |= utf[ix+2] & 0x3f;
11372
171k
      if (!xmlIsCharQ(codepoint))
11373
6
          return(-ix);
11374
171k
      ix += 3;
11375
909k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
900k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
898k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
898k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
898k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
5.16k
        return(-ix);
11381
893k
      codepoint = (utf[ix] & 0x7) << 18;
11382
893k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
893k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
893k
      codepoint |= utf[ix+3] & 0x3f;
11385
893k
      if (!xmlIsCharQ(codepoint))
11386
1.01k
          return(-ix);
11387
892k
      ix += 4;
11388
892k
  } else       /* unknown encoding */
11389
9.02k
      return(-ix);
11390
2.88M
      }
11391
16.7k
      return(ix);
11392
41.8k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
1.74M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
1.74M
    int ret = 0;
11406
1.74M
    int avail, tlen;
11407
1.74M
    xmlChar cur, next;
11408
11409
1.74M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
1.74M
    if ((ctxt->input != NULL) &&
11466
1.74M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
33.0k
        xmlParserInputShrink(ctxt->input);
11468
33.0k
    }
11469
11470
14.3M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
14.3M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
46.2k
      return(0);
11473
11474
14.2M
  if (ctxt->input == NULL) break;
11475
14.2M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
14.2M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
14.2M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
14.2M
          (ctxt->input->buf->raw != NULL) &&
11488
14.2M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
7.45k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
7.45k
                                                 ctxt->input);
11491
7.45k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
7.45k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
7.45k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
7.45k
                                      base, current);
11496
7.45k
      }
11497
14.2M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
14.2M
        (ctxt->input->cur - ctxt->input->base);
11499
14.2M
  }
11500
14.2M
        if (avail < 1)
11501
132k
      goto done;
11502
14.1M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
266k
            case XML_PARSER_START:
11509
266k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
105k
        xmlChar start[4];
11511
105k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
105k
        if (avail < 4)
11517
9.50k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
96.3k
        start[0] = RAW;
11527
96.3k
        start[1] = NXT(1);
11528
96.3k
        start[2] = NXT(2);
11529
96.3k
        start[3] = NXT(3);
11530
96.3k
        enc = xmlDetectCharEncoding(start, 4);
11531
96.3k
        xmlSwitchEncoding(ctxt, enc);
11532
96.3k
        break;
11533
105k
    }
11534
11535
160k
    if (avail < 2)
11536
719
        goto done;
11537
159k
    cur = ctxt->input->cur[0];
11538
159k
    next = ctxt->input->cur[1];
11539
159k
    if (cur == 0) {
11540
1.12k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
1.12k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
1.12k
                  &xmlDefaultSAXLocator);
11543
1.12k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
1.12k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
1.12k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
1.12k
      ctxt->sax->endDocument(ctxt->userData);
11551
1.12k
        goto done;
11552
1.12k
    }
11553
158k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
104k
        if (avail < 5) goto done;
11556
104k
        if ((!terminate) &&
11557
104k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
24.0k
      goto done;
11559
80.3k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
80.3k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
80.3k
                  &xmlDefaultSAXLocator);
11562
80.3k
        if ((ctxt->input->cur[2] == 'x') &&
11563
80.3k
      (ctxt->input->cur[3] == 'm') &&
11564
80.3k
      (ctxt->input->cur[4] == 'l') &&
11565
80.3k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
74.8k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
74.8k
      xmlParseXMLDecl(ctxt);
11572
74.8k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
157
          xmlHaltParser(ctxt);
11578
157
          return(0);
11579
157
      }
11580
74.6k
      ctxt->standalone = ctxt->input->standalone;
11581
74.6k
      if ((ctxt->encoding == NULL) &&
11582
74.6k
          (ctxt->input->encoding != NULL))
11583
4.50k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
74.6k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
74.6k
          (!ctxt->disableSAX))
11586
64.4k
          ctxt->sax->startDocument(ctxt->userData);
11587
74.6k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
74.6k
        } else {
11593
5.53k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
5.53k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
5.53k
          (!ctxt->disableSAX))
11596
5.53k
          ctxt->sax->startDocument(ctxt->userData);
11597
5.53k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
5.53k
        }
11603
80.3k
    } else {
11604
54.0k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
54.0k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
54.0k
                  &xmlDefaultSAXLocator);
11607
54.0k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
54.0k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
54.0k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
54.0k
            (!ctxt->disableSAX))
11614
54.0k
      ctxt->sax->startDocument(ctxt->userData);
11615
54.0k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
54.0k
    }
11621
134k
    break;
11622
2.52M
            case XML_PARSER_START_TAG: {
11623
2.52M
          const xmlChar *name;
11624
2.52M
    const xmlChar *prefix = NULL;
11625
2.52M
    const xmlChar *URI = NULL;
11626
2.52M
                int line = ctxt->input->line;
11627
2.52M
    int nsNr = ctxt->nsNr;
11628
11629
2.52M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
2.52M
    cur = ctxt->input->cur[0];
11632
2.52M
          if (cur != '<') {
11633
7.59k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
7.59k
        xmlHaltParser(ctxt);
11635
7.59k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
7.59k
      ctxt->sax->endDocument(ctxt->userData);
11637
7.59k
        goto done;
11638
7.59k
    }
11639
2.51M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
429k
                    goto done;
11641
2.08M
    if (ctxt->spaceNr == 0)
11642
7.10k
        spacePush(ctxt, -1);
11643
2.08M
    else if (*ctxt->space == -2)
11644
101k
        spacePush(ctxt, -1);
11645
1.97M
    else
11646
1.97M
        spacePush(ctxt, *ctxt->space);
11647
2.08M
#ifdef LIBXML_SAX1_ENABLED
11648
2.08M
    if (ctxt->sax2)
11649
1.28M
#endif /* LIBXML_SAX1_ENABLED */
11650
1.28M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
801k
#ifdef LIBXML_SAX1_ENABLED
11652
801k
    else
11653
801k
        name = xmlParseStartTag(ctxt);
11654
2.08M
#endif /* LIBXML_SAX1_ENABLED */
11655
2.08M
    if (ctxt->instate == XML_PARSER_EOF)
11656
247
        goto done;
11657
2.08M
    if (name == NULL) {
11658
5.41k
        spacePop(ctxt);
11659
5.41k
        xmlHaltParser(ctxt);
11660
5.41k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
5.41k
      ctxt->sax->endDocument(ctxt->userData);
11662
5.41k
        goto done;
11663
5.41k
    }
11664
2.08M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
2.08M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
2.08M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
2.08M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
2.08M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
334k
        SKIP(2);
11680
11681
334k
        if (ctxt->sax2) {
11682
253k
      if ((ctxt->sax != NULL) &&
11683
253k
          (ctxt->sax->endElementNs != NULL) &&
11684
253k
          (!ctxt->disableSAX))
11685
253k
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
253k
                                  prefix, URI);
11687
253k
      if (ctxt->nsNr - nsNr > 0)
11688
569
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
253k
#ifdef LIBXML_SAX1_ENABLED
11690
253k
        } else {
11691
80.5k
      if ((ctxt->sax != NULL) &&
11692
80.5k
          (ctxt->sax->endElement != NULL) &&
11693
80.5k
          (!ctxt->disableSAX))
11694
80.4k
          ctxt->sax->endElement(ctxt->userData, name);
11695
80.5k
#endif /* LIBXML_SAX1_ENABLED */
11696
80.5k
        }
11697
334k
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
334k
        spacePop(ctxt);
11700
334k
        if (ctxt->nameNr == 0) {
11701
4.69k
      ctxt->instate = XML_PARSER_EPILOG;
11702
329k
        } else {
11703
329k
      ctxt->instate = XML_PARSER_CONTENT;
11704
329k
        }
11705
334k
        break;
11706
334k
    }
11707
1.74M
    if (RAW == '>') {
11708
1.68M
        NEXT;
11709
1.68M
    } else {
11710
63.1k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
63.1k
           "Couldn't find end of Start Tag %s\n",
11712
63.1k
           name);
11713
63.1k
        nodePop(ctxt);
11714
63.1k
        spacePop(ctxt);
11715
63.1k
    }
11716
1.74M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
1.74M
    ctxt->instate = XML_PARSER_CONTENT;
11719
1.74M
                break;
11720
2.08M
      }
11721
9.05M
            case XML_PARSER_CONTENT: {
11722
9.05M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
45.0k
        goto done;
11724
9.01M
    cur = ctxt->input->cur[0];
11725
9.01M
    next = ctxt->input->cur[1];
11726
11727
9.01M
    if ((cur == '<') && (next == '/')) {
11728
1.61M
        ctxt->instate = XML_PARSER_END_TAG;
11729
1.61M
        break;
11730
7.39M
          } else if ((cur == '<') && (next == '?')) {
11731
30.2k
        if ((!terminate) &&
11732
30.2k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
12.8k
      goto done;
11734
17.4k
        xmlParsePI(ctxt);
11735
17.4k
        ctxt->instate = XML_PARSER_CONTENT;
11736
7.36M
    } else if ((cur == '<') && (next != '!')) {
11737
2.00M
        ctxt->instate = XML_PARSER_START_TAG;
11738
2.00M
        break;
11739
5.35M
    } else if ((cur == '<') && (next == '!') &&
11740
5.35M
               (ctxt->input->cur[2] == '-') &&
11741
5.35M
         (ctxt->input->cur[3] == '-')) {
11742
136k
        if ((!terminate) &&
11743
136k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
45.8k
      goto done;
11745
90.5k
        xmlParseComment(ctxt);
11746
90.5k
        ctxt->instate = XML_PARSER_CONTENT;
11747
5.21M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
5.21M
        (ctxt->input->cur[2] == '[') &&
11749
5.21M
        (ctxt->input->cur[3] == 'C') &&
11750
5.21M
        (ctxt->input->cur[4] == 'D') &&
11751
5.21M
        (ctxt->input->cur[5] == 'A') &&
11752
5.21M
        (ctxt->input->cur[6] == 'T') &&
11753
5.21M
        (ctxt->input->cur[7] == 'A') &&
11754
5.21M
        (ctxt->input->cur[8] == '[')) {
11755
7.73k
        SKIP(9);
11756
7.73k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
7.73k
        break;
11758
5.21M
    } else if ((cur == '<') && (next == '!') &&
11759
5.21M
               (avail < 9)) {
11760
1.92k
        goto done;
11761
5.20M
    } else if (cur == '<') {
11762
15.4k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
15.4k
                    "detected an error in element content\n");
11764
15.4k
                    SKIP(1);
11765
5.19M
    } else if (cur == '&') {
11766
894k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
24.8k
      goto done;
11768
869k
        xmlParseReference(ctxt);
11769
4.30M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
4.30M
        if ((ctxt->inputNr == 1) &&
11783
4.30M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
3.53M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
534k
          goto done;
11786
3.53M
                    }
11787
3.76M
                    ctxt->checkIndex = 0;
11788
3.76M
        xmlParseCharData(ctxt, 0);
11789
3.76M
    }
11790
4.75M
    break;
11791
9.01M
      }
11792
4.75M
            case XML_PARSER_END_TAG:
11793
1.66M
    if (avail < 2)
11794
0
        goto done;
11795
1.66M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
46.9k
        goto done;
11797
1.61M
    if (ctxt->sax2) {
11798
956k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
956k
        nameNsPop(ctxt);
11800
956k
    }
11801
660k
#ifdef LIBXML_SAX1_ENABLED
11802
660k
      else
11803
660k
        xmlParseEndTag1(ctxt, 0);
11804
1.61M
#endif /* LIBXML_SAX1_ENABLED */
11805
1.61M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
1.61M
    } else if (ctxt->nameNr == 0) {
11808
15.6k
        ctxt->instate = XML_PARSER_EPILOG;
11809
1.60M
    } else {
11810
1.60M
        ctxt->instate = XML_PARSER_CONTENT;
11811
1.60M
    }
11812
1.61M
    break;
11813
76.3k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
76.3k
    const xmlChar *term;
11819
11820
76.3k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
1.91k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
1.91k
                                           "]]>");
11827
74.4k
                } else {
11828
74.4k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
74.4k
                }
11830
11831
76.3k
    if (term == NULL) {
11832
65.4k
        int tmp, size;
11833
11834
65.4k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
1.40k
                        size = ctxt->input->end - ctxt->input->cur;
11837
64.0k
                    } else {
11838
64.0k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
34.4k
                            goto done;
11840
29.6k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
29.6k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
29.6k
                    }
11844
31.0k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
31.0k
                    if (tmp <= 0) {
11846
15.7k
                        tmp = -tmp;
11847
15.7k
                        ctxt->input->cur += tmp;
11848
15.7k
                        goto encoding_error;
11849
15.7k
                    }
11850
15.2k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
15.2k
                        if (ctxt->sax->cdataBlock != NULL)
11852
10.8k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
10.8k
                                                  ctxt->input->cur, tmp);
11854
4.41k
                        else if (ctxt->sax->characters != NULL)
11855
4.41k
                            ctxt->sax->characters(ctxt->userData,
11856
4.41k
                                                  ctxt->input->cur, tmp);
11857
15.2k
                    }
11858
15.2k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
15.2k
                    SKIPL(tmp);
11861
15.2k
    } else {
11862
10.8k
                    int base = term - CUR_PTR;
11863
10.8k
        int tmp;
11864
11865
10.8k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
10.8k
        if ((tmp < 0) || (tmp != base)) {
11867
7.18k
      tmp = -tmp;
11868
7.18k
      ctxt->input->cur += tmp;
11869
7.18k
      goto encoding_error;
11870
7.18k
        }
11871
3.64k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
3.64k
            (ctxt->sax->cdataBlock != NULL) &&
11873
3.64k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
28
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
28
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
28
                     "<![CDATA[", 9)))
11882
23
           ctxt->sax->cdataBlock(ctxt->userData,
11883
23
                                 BAD_CAST "", 0);
11884
3.61k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
3.61k
      (!ctxt->disableSAX)) {
11886
3.60k
      if (ctxt->sax->cdataBlock != NULL)
11887
2.97k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
2.97k
              ctxt->input->cur, base);
11889
625
      else if (ctxt->sax->characters != NULL)
11890
625
          ctxt->sax->characters(ctxt->userData,
11891
625
              ctxt->input->cur, base);
11892
3.60k
        }
11893
3.64k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
3.64k
        SKIPL(base + 3);
11896
3.64k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
3.64k
    }
11902
18.9k
    break;
11903
76.3k
      }
11904
176k
            case XML_PARSER_MISC:
11905
246k
            case XML_PARSER_PROLOG:
11906
268k
            case XML_PARSER_EPILOG:
11907
268k
    SKIP_BLANKS;
11908
268k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
268k
    else
11912
268k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
268k
                (ctxt->input->cur - ctxt->input->base);
11914
268k
    if (avail < 2)
11915
14.9k
        goto done;
11916
253k
    cur = ctxt->input->cur[0];
11917
253k
    next = ctxt->input->cur[1];
11918
253k
          if ((cur == '<') && (next == '?')) {
11919
33.8k
        if ((!terminate) &&
11920
33.8k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
7.88k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
25.9k
        xmlParsePI(ctxt);
11927
25.9k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
219k
    } else if ((cur == '<') && (next == '!') &&
11930
219k
        (ctxt->input->cur[2] == '-') &&
11931
219k
        (ctxt->input->cur[3] == '-')) {
11932
31.9k
        if ((!terminate) &&
11933
31.9k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
18.6k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
13.3k
        xmlParseComment(ctxt);
11940
13.3k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
187k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
187k
                    (cur == '<') && (next == '!') &&
11944
187k
        (ctxt->input->cur[2] == 'D') &&
11945
187k
        (ctxt->input->cur[3] == 'O') &&
11946
187k
        (ctxt->input->cur[4] == 'C') &&
11947
187k
        (ctxt->input->cur[5] == 'T') &&
11948
187k
        (ctxt->input->cur[6] == 'Y') &&
11949
187k
        (ctxt->input->cur[7] == 'P') &&
11950
187k
        (ctxt->input->cur[8] == 'E')) {
11951
92.2k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
27.4k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
64.8k
        ctxt->inSubset = 1;
11958
64.8k
        xmlParseDocTypeDecl(ctxt);
11959
64.8k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
64.8k
        if (RAW == '[') {
11962
49.4k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
49.4k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
15.4k
      ctxt->inSubset = 2;
11972
15.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
15.4k
          (ctxt->sax->externalSubset != NULL))
11974
14.1k
          ctxt->sax->externalSubset(ctxt->userData,
11975
14.1k
            ctxt->intSubName, ctxt->extSubSystem,
11976
14.1k
            ctxt->extSubURI);
11977
15.4k
      ctxt->inSubset = 0;
11978
15.4k
      xmlCleanSpecialAttr(ctxt);
11979
15.4k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
15.4k
        }
11985
95.5k
    } else if ((cur == '<') && (next == '!') &&
11986
95.5k
               (avail <
11987
1.93k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
697
        goto done;
11989
94.8k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
2.41k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
2.41k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
2.41k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
2.41k
      ctxt->sax->endDocument(ctxt->userData);
11998
2.41k
        goto done;
11999
92.4k
                } else {
12000
92.4k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
92.4k
    }
12006
196k
    break;
12007
274k
            case XML_PARSER_DTD: {
12008
274k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
227k
                    goto done;
12010
46.3k
    xmlParseInternalSubset(ctxt);
12011
46.3k
    if (ctxt->instate == XML_PARSER_EOF)
12012
11.4k
        goto done;
12013
34.8k
    ctxt->inSubset = 2;
12014
34.8k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
34.8k
        (ctxt->sax->externalSubset != NULL))
12016
33.3k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
33.3k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
34.8k
    ctxt->inSubset = 0;
12019
34.8k
    xmlCleanSpecialAttr(ctxt);
12020
34.8k
    if (ctxt->instate == XML_PARSER_EOF)
12021
4.55k
        goto done;
12022
30.3k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
30.3k
                break;
12028
34.8k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
14.1M
  }
12102
14.1M
    }
12103
1.67M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
1.67M
    return(ret);
12108
22.9k
encoding_error:
12109
22.9k
    {
12110
22.9k
        char buffer[150];
12111
12112
22.9k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
22.9k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
22.9k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
22.9k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
22.9k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
22.9k
         BAD_CAST buffer, NULL);
12118
22.9k
    }
12119
22.9k
    return(0);
12120
1.74M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
3.12M
              int terminate) {
12136
3.12M
    int end_in_lf = 0;
12137
3.12M
    int remain = 0;
12138
12139
3.12M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
3.12M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
1.38M
        return(ctxt->errNo);
12143
1.74M
    if (ctxt->instate == XML_PARSER_EOF)
12144
238
        return(-1);
12145
1.74M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
1.74M
    ctxt->progressive = 1;
12149
1.74M
    if (ctxt->instate == XML_PARSER_START)
12150
173k
        xmlDetectSAX2(ctxt);
12151
1.74M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
1.74M
        (chunk[size - 1] == '\r')) {
12153
2.18k
  end_in_lf = 1;
12154
2.18k
  size--;
12155
2.18k
    }
12156
12157
1.74M
xmldecl_done:
12158
12159
1.74M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
1.74M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
1.65M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
1.65M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
1.65M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
1.65M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
1.65M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
2.19k
            unsigned int len = 45;
12173
12174
2.19k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
2.19k
                               BAD_CAST "UTF-16")) ||
12176
2.19k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
897
                               BAD_CAST "UTF16")))
12178
1.29k
                len = 90;
12179
897
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
897
                                    BAD_CAST "UCS-4")) ||
12181
897
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
671
                                    BAD_CAST "UCS4")))
12183
226
                len = 180;
12184
12185
2.19k
            if (ctxt->input->buf->rawconsumed < len)
12186
1.89k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
2.19k
            if ((unsigned int) size > len) {
12194
836
                remain = size - len;
12195
836
                size = len;
12196
1.35k
            } else {
12197
1.35k
                remain = 0;
12198
1.35k
            }
12199
2.19k
        }
12200
1.65M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
1.65M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
1.65M
  if (res < 0) {
12203
492
      ctxt->errNo = XML_PARSER_EOF;
12204
492
      xmlHaltParser(ctxt);
12205
492
      return (XML_PARSER_EOF);
12206
492
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
1.65M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
91.9k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
91.9k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
91.9k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
91.9k
        (in->raw != NULL)) {
12216
4.66k
    int nbchars;
12217
4.66k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
4.66k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
4.66k
    nbchars = xmlCharEncInput(in, terminate);
12221
4.66k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
4.66k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
546
        xmlGenericError(xmlGenericErrorContext,
12225
546
            "xmlParseChunk: encoder error\n");
12226
546
                    xmlHaltParser(ctxt);
12227
546
        return(XML_ERR_INVALID_ENCODING);
12228
546
    }
12229
4.66k
      }
12230
91.9k
  }
12231
91.9k
    }
12232
12233
1.74M
    if (remain != 0) {
12234
737
        xmlParseTryOrFinish(ctxt, 0);
12235
1.74M
    } else {
12236
1.74M
        xmlParseTryOrFinish(ctxt, terminate);
12237
1.74M
    }
12238
1.74M
    if (ctxt->instate == XML_PARSER_EOF)
12239
33.5k
        return(ctxt->errNo);
12240
12241
1.71M
    if ((ctxt->input != NULL) &&
12242
1.71M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
1.71M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
1.71M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
1.71M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
48.1k
        return(ctxt->errNo);
12250
12251
1.66M
    if (remain != 0) {
12252
434
        chunk += size;
12253
434
        size = remain;
12254
434
        remain = 0;
12255
434
        goto xmldecl_done;
12256
434
    }
12257
1.66M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
1.66M
        (ctxt->input->buf != NULL)) {
12259
1.91k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
1.91k
           ctxt->input);
12261
1.91k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
1.91k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
1.91k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
1.91k
            base, current);
12267
1.91k
    }
12268
1.66M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
43.5k
  int cur_avail = 0;
12273
12274
43.5k
  if (ctxt->input != NULL) {
12275
43.5k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
43.5k
      else
12279
43.5k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
43.5k
                    (ctxt->input->cur - ctxt->input->base);
12281
43.5k
  }
12282
12283
43.5k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
43.5k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
27.6k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
27.6k
  }
12287
43.5k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
317
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
317
  }
12290
43.5k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
43.5k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
43.5k
    ctxt->sax->endDocument(ctxt->userData);
12293
43.5k
  }
12294
43.5k
  ctxt->instate = XML_PARSER_EOF;
12295
43.5k
    }
12296
1.66M
    if (ctxt->wellFormed == 0)
12297
639k
  return((xmlParserErrors) ctxt->errNo);
12298
1.02M
    else
12299
1.02M
        return(0);
12300
1.66M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
142k
                        const char *chunk, int size, const char *filename) {
12330
142k
    xmlParserCtxtPtr ctxt;
12331
142k
    xmlParserInputPtr inputStream;
12332
142k
    xmlParserInputBufferPtr buf;
12333
142k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
142k
    if ((chunk != NULL) && (size >= 4))
12339
68.5k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
142k
    buf = xmlAllocParserInputBuffer(enc);
12342
142k
    if (buf == NULL) return(NULL);
12343
12344
142k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
142k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
142k
    ctxt->dictNames = 1;
12351
142k
    if (filename == NULL) {
12352
71.4k
  ctxt->directory = NULL;
12353
71.4k
    } else {
12354
71.4k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
71.4k
    }
12356
12357
142k
    inputStream = xmlNewInputStream(ctxt);
12358
142k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
142k
    if (filename == NULL)
12365
71.4k
  inputStream->filename = NULL;
12366
71.4k
    else {
12367
71.4k
  inputStream->filename = (char *)
12368
71.4k
      xmlCanonicPath((const xmlChar *) filename);
12369
71.4k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
71.4k
    }
12376
142k
    inputStream->buf = buf;
12377
142k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
142k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
142k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
142k
    if ((size != 0) && (chunk != NULL) &&
12388
142k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
68.5k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
68.5k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
68.5k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
68.5k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
68.5k
    }
12399
12400
142k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
40.6k
        xmlSwitchEncoding(ctxt, enc);
12402
40.6k
    }
12403
12404
142k
    return(ctxt);
12405
142k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
124k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
124k
    if (ctxt == NULL)
12418
0
        return;
12419
124k
    ctxt->instate = XML_PARSER_EOF;
12420
124k
    ctxt->disableSAX = 1;
12421
138k
    while (ctxt->inputNr > 1)
12422
13.5k
        xmlFreeInputStream(inputPop(ctxt));
12423
124k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
124k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
124k
        if (ctxt->input->buf != NULL) {
12433
111k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
111k
            ctxt->input->buf = NULL;
12435
111k
        }
12436
124k
  ctxt->input->cur = BAD_CAST"";
12437
124k
        ctxt->input->length = 0;
12438
124k
  ctxt->input->base = ctxt->input->cur;
12439
124k
        ctxt->input->end = ctxt->input->cur;
12440
124k
    }
12441
124k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
71.8k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
71.8k
    if (ctxt == NULL)
12452
0
        return;
12453
71.8k
    xmlHaltParser(ctxt);
12454
71.8k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
71.8k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
78.4k
          const xmlChar *ID, xmlNodePtr *list) {
12832
78.4k
    xmlParserCtxtPtr ctxt;
12833
78.4k
    xmlDocPtr newDoc;
12834
78.4k
    xmlNodePtr newRoot;
12835
78.4k
    xmlParserErrors ret = XML_ERR_OK;
12836
78.4k
    xmlChar start[4];
12837
78.4k
    xmlCharEncoding enc;
12838
12839
78.4k
    if (((depth > 40) &&
12840
78.4k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
78.4k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
78.4k
    if (list != NULL)
12848
10.8k
        *list = NULL;
12849
78.4k
    if ((URL == NULL) && (ID == NULL))
12850
58
  return(XML_ERR_INTERNAL_ERROR);
12851
78.3k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
78.3k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
78.3k
                                             oldctxt);
12856
78.3k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
17.6k
    if (oldctxt != NULL) {
12858
17.6k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
17.6k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
17.6k
    }
12861
17.6k
    xmlDetectSAX2(ctxt);
12862
12863
17.6k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
17.6k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
17.6k
    newDoc->properties = XML_DOC_INTERNAL;
12869
17.6k
    if (doc) {
12870
17.6k
        newDoc->intSubset = doc->intSubset;
12871
17.6k
        newDoc->extSubset = doc->extSubset;
12872
17.6k
        if (doc->dict) {
12873
12.9k
            newDoc->dict = doc->dict;
12874
12.9k
            xmlDictReference(newDoc->dict);
12875
12.9k
        }
12876
17.6k
        if (doc->URL != NULL) {
12877
11.5k
            newDoc->URL = xmlStrdup(doc->URL);
12878
11.5k
        }
12879
17.6k
    }
12880
17.6k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
17.6k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
17.6k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
17.6k
    nodePush(ctxt, newDoc->children);
12891
17.6k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
17.6k
    } else {
12894
17.6k
        ctxt->myDoc = doc;
12895
17.6k
        newRoot->doc = doc;
12896
17.6k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
17.6k
    GROW;
12904
17.6k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
17.5k
  start[0] = RAW;
12906
17.5k
  start[1] = NXT(1);
12907
17.5k
  start[2] = NXT(2);
12908
17.5k
  start[3] = NXT(3);
12909
17.5k
  enc = xmlDetectCharEncoding(start, 4);
12910
17.5k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
524
      xmlSwitchEncoding(ctxt, enc);
12912
524
  }
12913
17.5k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
17.6k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
476
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
476
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
476
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
36
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
36
                           "Version mismatch between document and entity\n");
12927
36
        }
12928
476
    }
12929
12930
17.6k
    ctxt->instate = XML_PARSER_CONTENT;
12931
17.6k
    ctxt->depth = depth;
12932
17.6k
    if (oldctxt != NULL) {
12933
17.6k
  ctxt->_private = oldctxt->_private;
12934
17.6k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
17.6k
  ctxt->validate = oldctxt->validate;
12936
17.6k
  ctxt->valid = oldctxt->valid;
12937
17.6k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
17.6k
        if (oldctxt->validate) {
12939
6.16k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
6.16k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
6.16k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
6.16k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
6.16k
        }
12944
17.6k
  ctxt->external = oldctxt->external;
12945
17.6k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
17.6k
        ctxt->dict = oldctxt->dict;
12947
17.6k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
17.6k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
17.6k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
17.6k
        ctxt->dictNames = oldctxt->dictNames;
12951
17.6k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
17.6k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
17.6k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
17.6k
  ctxt->record_info = oldctxt->record_info;
12955
17.6k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
17.6k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
17.6k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
17.6k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
17.6k
    xmlParseContent(ctxt);
12970
12971
17.6k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
195
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
17.4k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
17.6k
    if (ctxt->node != newDoc->children) {
12977
880
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
880
    }
12979
12980
17.6k
    if (!ctxt->wellFormed) {
12981
6.16k
  ret = (xmlParserErrors)ctxt->errNo;
12982
6.16k
        if (oldctxt != NULL) {
12983
6.16k
            oldctxt->errNo = ctxt->errNo;
12984
6.16k
            oldctxt->wellFormed = 0;
12985
6.16k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
6.16k
        }
12987
11.4k
    } else {
12988
11.4k
  if (list != NULL) {
12989
2.89k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
2.89k
      cur = newDoc->children->children;
12996
2.89k
      *list = cur;
12997
379k
      while (cur != NULL) {
12998
376k
    cur->parent = NULL;
12999
376k
    cur = cur->next;
13000
376k
      }
13001
2.89k
            newDoc->children->children = NULL;
13002
2.89k
  }
13003
11.4k
  ret = XML_ERR_OK;
13004
11.4k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
17.6k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
17.6k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
17.6k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
17.6k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
17.6k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
17.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
17.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
17.6k
    }
13020
13021
17.6k
    if (oldctxt != NULL) {
13022
17.6k
        ctxt->dict = NULL;
13023
17.6k
        ctxt->attsDefault = NULL;
13024
17.6k
        ctxt->attsSpecial = NULL;
13025
17.6k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
17.6k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
17.6k
        oldctxt->validate = ctxt->validate;
13028
17.6k
        oldctxt->valid = ctxt->valid;
13029
17.6k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
17.6k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
17.6k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
17.6k
    }
13033
17.6k
    ctxt->node_seq.maximum = 0;
13034
17.6k
    ctxt->node_seq.length = 0;
13035
17.6k
    ctxt->node_seq.buffer = NULL;
13036
17.6k
    xmlFreeParserCtxt(ctxt);
13037
17.6k
    newDoc->intSubset = NULL;
13038
17.6k
    newDoc->extSubset = NULL;
13039
17.6k
    xmlFreeDoc(newDoc);
13040
13041
17.6k
    return(ret);
13042
17.6k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
9.98k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
9.98k
    xmlParserCtxtPtr ctxt;
13125
9.98k
    xmlDocPtr newDoc = NULL;
13126
9.98k
    xmlNodePtr newRoot;
13127
9.98k
    xmlSAXHandlerPtr oldsax = NULL;
13128
9.98k
    xmlNodePtr content = NULL;
13129
9.98k
    xmlNodePtr last = NULL;
13130
9.98k
    int size;
13131
9.98k
    xmlParserErrors ret = XML_ERR_OK;
13132
9.98k
#ifdef SAX2
13133
9.98k
    int i;
13134
9.98k
#endif
13135
13136
9.98k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
9.98k
        (oldctxt->depth >  100)) {
13138
39
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
39
                       "Maximum entity nesting depth exceeded");
13140
39
  return(XML_ERR_ENTITY_LOOP);
13141
39
    }
13142
13143
13144
9.94k
    if (lst != NULL)
13145
9.94k
        *lst = NULL;
13146
9.94k
    if (string == NULL)
13147
16
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
9.93k
    size = xmlStrlen(string);
13150
13151
9.93k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
9.93k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
9.86k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
9.86k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
9.86k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
9.86k
    else
13158
9.86k
  ctxt->userData = ctxt;
13159
9.86k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
9.86k
    ctxt->dict = oldctxt->dict;
13161
9.86k
    ctxt->input_id = oldctxt->input_id;
13162
9.86k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
9.86k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
9.86k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
9.86k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
9.86k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
0
    }
13171
9.86k
#endif
13172
13173
9.86k
    oldsax = ctxt->sax;
13174
9.86k
    ctxt->sax = oldctxt->sax;
13175
9.86k
    xmlDetectSAX2(ctxt);
13176
9.86k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
9.86k
    ctxt->options = oldctxt->options;
13178
13179
9.86k
    ctxt->_private = oldctxt->_private;
13180
9.86k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
9.86k
    } else {
13193
9.86k
  ctxt->myDoc = oldctxt->myDoc;
13194
9.86k
        content = ctxt->myDoc->children;
13195
9.86k
  last = ctxt->myDoc->last;
13196
9.86k
    }
13197
9.86k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
9.86k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
9.86k
    ctxt->myDoc->children = NULL;
13208
9.86k
    ctxt->myDoc->last = NULL;
13209
9.86k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
9.86k
    nodePush(ctxt, ctxt->myDoc->children);
13211
9.86k
    ctxt->instate = XML_PARSER_CONTENT;
13212
9.86k
    ctxt->depth = oldctxt->depth;
13213
13214
9.86k
    ctxt->validate = 0;
13215
9.86k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
9.86k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
7.75k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
7.75k
    }
13222
9.86k
    ctxt->dictNames = oldctxt->dictNames;
13223
9.86k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
9.86k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
9.86k
    xmlParseContent(ctxt);
13227
9.86k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
52
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
9.81k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
9.86k
    if (ctxt->node != ctxt->myDoc->children) {
13233
118
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
118
    }
13235
13236
9.86k
    if (!ctxt->wellFormed) {
13237
4.18k
  ret = (xmlParserErrors)ctxt->errNo;
13238
4.18k
        oldctxt->errNo = ctxt->errNo;
13239
4.18k
        oldctxt->wellFormed = 0;
13240
4.18k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
5.68k
    } else {
13242
5.68k
        ret = XML_ERR_OK;
13243
5.68k
    }
13244
13245
9.86k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
5.68k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
5.68k
  cur = ctxt->myDoc->children->children;
13253
5.68k
  *lst = cur;
13254
14.7k
  while (cur != NULL) {
13255
9.09k
#ifdef LIBXML_VALID_ENABLED
13256
9.09k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
9.09k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
9.09k
    (cur->type == XML_ELEMENT_NODE)) {
13259
558
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
558
      oldctxt->myDoc, cur);
13261
558
      }
13262
9.09k
#endif /* LIBXML_VALID_ENABLED */
13263
9.09k
      cur->parent = NULL;
13264
9.09k
      cur = cur->next;
13265
9.09k
  }
13266
5.68k
  ctxt->myDoc->children->children = NULL;
13267
5.68k
    }
13268
9.86k
    if (ctxt->myDoc != NULL) {
13269
9.86k
  xmlFreeNode(ctxt->myDoc->children);
13270
9.86k
        ctxt->myDoc->children = content;
13271
9.86k
        ctxt->myDoc->last = last;
13272
9.86k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
9.86k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
9.86k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
9.86k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
9.86k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
9.86k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
9.86k
    }
13285
13286
9.86k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
9.86k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
9.86k
    ctxt->sax = oldsax;
13289
9.86k
    ctxt->dict = NULL;
13290
9.86k
    ctxt->attsDefault = NULL;
13291
9.86k
    ctxt->attsSpecial = NULL;
13292
9.86k
    xmlFreeParserCtxt(ctxt);
13293
9.86k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
9.86k
    return(ret);
13298
9.86k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
78.3k
        xmlParserCtxtPtr pctx) {
13783
78.3k
    xmlParserCtxtPtr ctxt;
13784
78.3k
    xmlParserInputPtr inputStream;
13785
78.3k
    char *directory = NULL;
13786
78.3k
    xmlChar *uri;
13787
13788
78.3k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
78.3k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
78.3k
    if (pctx != NULL) {
13794
78.3k
        ctxt->options = pctx->options;
13795
78.3k
        ctxt->_private = pctx->_private;
13796
78.3k
  ctxt->input_id = pctx->input_id;
13797
78.3k
    }
13798
13799
    /* Don't read from stdin. */
13800
78.3k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
78.3k
    uri = xmlBuildURI(URL, base);
13804
13805
78.3k
    if (uri == NULL) {
13806
151
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
151
  if (inputStream == NULL) {
13808
151
      xmlFreeParserCtxt(ctxt);
13809
151
      return(NULL);
13810
151
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
78.2k
    } else {
13819
78.2k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
78.2k
  if (inputStream == NULL) {
13821
60.6k
      xmlFree(uri);
13822
60.6k
      xmlFreeParserCtxt(ctxt);
13823
60.6k
      return(NULL);
13824
60.6k
  }
13825
13826
17.6k
  inputPush(ctxt, inputStream);
13827
13828
17.6k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
17.6k
      directory = xmlParserGetDirectory((char *)uri);
13830
17.6k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
17.6k
      ctxt->directory = directory;
13832
17.6k
  xmlFree(uri);
13833
17.6k
    }
13834
17.6k
    return(ctxt);
13835
78.3k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
81.3k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
81.3k
    xmlParserCtxtPtr ctxt;
14178
81.3k
    xmlParserInputPtr input;
14179
81.3k
    xmlParserInputBufferPtr buf;
14180
14181
81.3k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
81.3k
    if (size <= 0)
14184
1.10k
  return(NULL);
14185
14186
80.2k
    ctxt = xmlNewParserCtxt();
14187
80.2k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
80.2k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
80.2k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
80.2k
    input = xmlNewInputStream(ctxt);
14197
80.2k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
80.2k
    input->filename = NULL;
14204
80.2k
    input->buf = buf;
14205
80.2k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
80.2k
    inputPush(ctxt, input);
14208
80.2k
    return(ctxt);
14209
80.2k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
315M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
315M
    if (xmlParserInitialized != 0)
14525
315M
  return;
14526
14527
50
#ifdef LIBXML_THREAD_ENABLED
14528
50
    __xmlGlobalInitMutexLock();
14529
50
    if (xmlParserInitialized == 0) {
14530
50
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
50
  xmlInitThreadsInternal();
14537
50
  xmlInitGlobalsInternal();
14538
50
  xmlInitMemoryInternal();
14539
50
        __xmlInitializeDict();
14540
50
  xmlInitEncodingInternal();
14541
50
  xmlRegisterDefaultInputCallbacks();
14542
50
#ifdef LIBXML_OUTPUT_ENABLED
14543
50
  xmlRegisterDefaultOutputCallbacks();
14544
50
#endif /* LIBXML_OUTPUT_ENABLED */
14545
50
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
50
  xmlInitXPathInternal();
14547
50
#endif
14548
50
  xmlParserInitialized = 1;
14549
50
#ifdef LIBXML_THREAD_ENABLED
14550
50
    }
14551
50
    __xmlGlobalInitMutexUnlock();
14552
50
#endif
14553
50
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
213k
{
14843
213k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
213k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
213k
    if (options & XML_PARSE_RECOVER) {
14851
89.4k
        ctxt->recovery = 1;
14852
89.4k
        options -= XML_PARSE_RECOVER;
14853
89.4k
  ctxt->options |= XML_PARSE_RECOVER;
14854
89.4k
    } else
14855
123k
        ctxt->recovery = 0;
14856
213k
    if (options & XML_PARSE_DTDLOAD) {
14857
163k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
163k
        options -= XML_PARSE_DTDLOAD;
14859
163k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
163k
    } else
14861
49.6k
        ctxt->loadsubset = 0;
14862
213k
    if (options & XML_PARSE_DTDATTR) {
14863
53.8k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
53.8k
        options -= XML_PARSE_DTDATTR;
14865
53.8k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
53.8k
    }
14867
213k
    if (options & XML_PARSE_NOENT) {
14868
150k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
150k
        options -= XML_PARSE_NOENT;
14871
150k
  ctxt->options |= XML_PARSE_NOENT;
14872
150k
    } else
14873
62.8k
        ctxt->replaceEntities = 0;
14874
213k
    if (options & XML_PARSE_PEDANTIC) {
14875
31.6k
        ctxt->pedantic = 1;
14876
31.6k
        options -= XML_PARSE_PEDANTIC;
14877
31.6k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
31.6k
    } else
14879
181k
        ctxt->pedantic = 0;
14880
213k
    if (options & XML_PARSE_NOBLANKS) {
14881
55.5k
        ctxt->keepBlanks = 0;
14882
55.5k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
55.5k
        options -= XML_PARSE_NOBLANKS;
14884
55.5k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
55.5k
    } else
14886
157k
        ctxt->keepBlanks = 1;
14887
213k
    if (options & XML_PARSE_DTDVALID) {
14888
42.9k
        ctxt->validate = 1;
14889
42.9k
        if (options & XML_PARSE_NOWARNING)
14890
33.3k
            ctxt->vctxt.warning = NULL;
14891
42.9k
        if (options & XML_PARSE_NOERROR)
14892
31.5k
            ctxt->vctxt.error = NULL;
14893
42.9k
        options -= XML_PARSE_DTDVALID;
14894
42.9k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
42.9k
    } else
14896
170k
        ctxt->validate = 0;
14897
213k
    if (options & XML_PARSE_NOWARNING) {
14898
64.9k
        ctxt->sax->warning = NULL;
14899
64.9k
        options -= XML_PARSE_NOWARNING;
14900
64.9k
    }
14901
213k
    if (options & XML_PARSE_NOERROR) {
14902
75.8k
        ctxt->sax->error = NULL;
14903
75.8k
        ctxt->sax->fatalError = NULL;
14904
75.8k
        options -= XML_PARSE_NOERROR;
14905
75.8k
    }
14906
213k
#ifdef LIBXML_SAX1_ENABLED
14907
213k
    if (options & XML_PARSE_SAX1) {
14908
73.2k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
73.2k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
73.2k
        ctxt->sax->startElementNs = NULL;
14911
73.2k
        ctxt->sax->endElementNs = NULL;
14912
73.2k
        ctxt->sax->initialized = 1;
14913
73.2k
        options -= XML_PARSE_SAX1;
14914
73.2k
  ctxt->options |= XML_PARSE_SAX1;
14915
73.2k
    }
14916
213k
#endif /* LIBXML_SAX1_ENABLED */
14917
213k
    if (options & XML_PARSE_NODICT) {
14918
63.1k
        ctxt->dictNames = 0;
14919
63.1k
        options -= XML_PARSE_NODICT;
14920
63.1k
  ctxt->options |= XML_PARSE_NODICT;
14921
150k
    } else {
14922
150k
        ctxt->dictNames = 1;
14923
150k
    }
14924
213k
    if (options & XML_PARSE_NOCDATA) {
14925
72.1k
        ctxt->sax->cdataBlock = NULL;
14926
72.1k
        options -= XML_PARSE_NOCDATA;
14927
72.1k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
72.1k
    }
14929
213k
    if (options & XML_PARSE_NSCLEAN) {
14930
82.9k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
82.9k
        options -= XML_PARSE_NSCLEAN;
14932
82.9k
    }
14933
213k
    if (options & XML_PARSE_NONET) {
14934
58.4k
  ctxt->options |= XML_PARSE_NONET;
14935
58.4k
        options -= XML_PARSE_NONET;
14936
58.4k
    }
14937
213k
    if (options & XML_PARSE_COMPACT) {
14938
107k
  ctxt->options |= XML_PARSE_COMPACT;
14939
107k
        options -= XML_PARSE_COMPACT;
14940
107k
    }
14941
213k
    if (options & XML_PARSE_OLD10) {
14942
73.0k
  ctxt->options |= XML_PARSE_OLD10;
14943
73.0k
        options -= XML_PARSE_OLD10;
14944
73.0k
    }
14945
213k
    if (options & XML_PARSE_NOBASEFIX) {
14946
65.4k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
65.4k
        options -= XML_PARSE_NOBASEFIX;
14948
65.4k
    }
14949
213k
    if (options & XML_PARSE_HUGE) {
14950
62.7k
  ctxt->options |= XML_PARSE_HUGE;
14951
62.7k
        options -= XML_PARSE_HUGE;
14952
62.7k
        if (ctxt->dict != NULL)
14953
62.7k
            xmlDictSetLimit(ctxt->dict, 0);
14954
62.7k
    }
14955
213k
    if (options & XML_PARSE_OLDSAX) {
14956
50.4k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
50.4k
        options -= XML_PARSE_OLDSAX;
14958
50.4k
    }
14959
213k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
82.8k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
82.8k
        options -= XML_PARSE_IGNORE_ENC;
14962
82.8k
    }
14963
213k
    if (options & XML_PARSE_BIG_LINES) {
14964
71.3k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
71.3k
        options -= XML_PARSE_BIG_LINES;
14966
71.3k
    }
14967
213k
    ctxt->linenumbers = 1;
14968
213k
    return (options);
14969
213k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
142k
{
14984
142k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
142k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
70.4k
{
15003
70.4k
    xmlDocPtr ret;
15004
15005
70.4k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
70.4k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
70.4k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
70.4k
        (ctxt->input->filename == NULL))
15015
70.4k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
70.4k
    xmlParseDocument(ctxt);
15017
70.4k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
34.7k
        ret = ctxt->myDoc;
15019
35.6k
    else {
15020
35.6k
        ret = NULL;
15021
35.6k
  if (ctxt->myDoc != NULL) {
15022
29.9k
      xmlFreeDoc(ctxt->myDoc);
15023
29.9k
  }
15024
35.6k
    }
15025
70.4k
    ctxt->myDoc = NULL;
15026
70.4k
    if (!reuse) {
15027
70.4k
  xmlFreeParserCtxt(ctxt);
15028
70.4k
    }
15029
15030
70.4k
    return (ret);
15031
70.4k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
71.4k
{
15096
71.4k
    xmlParserCtxtPtr ctxt;
15097
15098
71.4k
    xmlInitParser();
15099
71.4k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
71.4k
    if (ctxt == NULL)
15101
1.03k
        return (NULL);
15102
70.4k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
71.4k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387